/* * Copyright (c) 1991-1995 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Computer Systems * Engineering Group at Lawrence Berkeley Laboratory. * 4. Neither the name of the University nor of the Laboratory may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ static const char rcsid[] = "@(#) $Header: controller.cc,v 1.47 96/05/16 05:20:07 van Exp $ (LBL)"; #define AUDIO_SPS 8000 /* audio samples per second (used to convert * playout delay from seconds to bytes of buffer). * This is the sample rate used by the audio * hardware. If this rate is different than * the sample rate of some network audio format, * sample rate conversion has to be done between * the encoder/decoder objects & here. Note * that sample rate conversion is a very compute * intensive operation & there isn't currently * any support for it in vat. Also note that * there are many control variables expressed * in terms of a 160 sample (20ms at 8KHz) * audio frame size and some of these need to * be changed if the AUDIO_SPS is changed. */ #define SS_GRANULARITY 1440 /* sample stream sizes are always rounded * to some integer multiple of this number. * It should be set to the least common * multiple of the possible *output* audio * frame sizes (see comments below). * In our case, possible frame sizes are * 160 & 180 samples. */ /* * following is timeout interval (in ms) when vat does not have * the audio & is running off a system timer instead. This interval * must be the same as the audio read blocksize (FRAMESIZE / * AUDIO_SPS * 1000 ms) and must be < 1 sec. */ #define FRAME_TIME (FRAMESIZE * 1000 / AUDIO_SPS) #include "config.h" #include "sys-time.h" #include "audio.h" #include "ss.h" #include "controller.h" #include "encoder.h" #include "mulaw.h" #include "vu.h" #include "Tcl.h" #include "ntp-time.h" #define METER_UPDATE_FREQ 3 /* * A controller class specifically for handling half-duplex audio * devices like those commonly found in PCs. It derives its timebase * from a 20ms system timer rather than using audio read completions * like the normal controller class. */ class HDController : public Controller { public: HDController(); virtual void update(Observable*); protected: virtual void timeout(); virtual void audio_handle(); }; static class ControllerMatch : public Matcher { public: ControllerMatch() : Matcher("controller") {} TclObject* match(const char* id) { if (strcasecmp(id, "full-duplex") == 0) return (new Controller); else if (strcasecmp(id, "half-duplex") == 0) return (new HDController); else return (0); } } controller_matcher; Controller::Controller() : audio_(0), lastnetout_(0), lastaudout_(0), #ifdef WIN32 /* windows is very, very slow & can't hack 20ms events */ timer_interval_(FRAME_TIME * 4), #else timer_interval_(FRAME_TIME), #endif tsec_(0), tusec_(0), ostate_(TALK_TAIL + TALK_LEAD), pmeter_(0), rmeter_(0), talk_thresh_(0), echo_thresh_(0), echo_suppress_time_(0), meter_update_(METER_UPDATE_FREQ), active_(0), encoder_(0), outmax_(FRAMESIZE), outlen_(0), out_ts_(0), out_(0) { Tcl& tcl = Tcl::instance(); echo_thresh_ = atoi(tcl.attr("echoThresh")); echo_suppress_time_ = atoi(tcl.attr("echoSuppressTime")) / 20 * FRAMESIZE; idle_drop_time_ = atoi(tcl.attr("idleDropTime")) * 50; int maxplayout = atoi(tcl.attr("maxPlayout")) * AUDIO_SPS; if (maxplayout < (TALK_LEAD + TALK_TAIL + 2) * FRAMESIZE) { printf(" max playout delay %d too short - using %d sec.\n", maxplayout, ((TALK_LEAD + TALK_TAIL + 2) * FRAMESIZE + AUDIO_SPS - 1) / AUDIO_SPS); maxplayout = (TALK_LEAD + TALK_TAIL + 2) * FRAMESIZE; } maxplayout = (maxplayout + (SS_GRANULARITY - 1)) / SS_GRANULARITY; maxplayout *= SS_GRANULARITY / FRAMESIZE; /*XXX tk script */ int magclevel = atoi(tcl.attr("mikeAGCLevel")); int sagclevel = atoi(tcl.attr("speakerAGCLevel")); as_ = new SampleStream(FRAMESIZE, maxplayout, (TALK_LEAD+1)*FRAMESIZE, magclevel); ns_ = new SampleStream(FRAMESIZE, maxplayout, (TALK_LEAD+1)*FRAMESIZE, sagclevel); lastaudout_ = as_->Clock(); lastnetout_ = ns_->Clock(); int thresh = atoi(tcl.attr("silenceThresh")); as_->ssthresh(thresh); ns_->ssthresh(thresh); } void Controller::update(Observable*) { if (!audio_->HaveAudio()) { timeval tv; ::gettimeofday(&tv, 0); tsec_ = tv.tv_sec; tusec_ = tv.tv_usec; /* Reset the meters and force a redraw. */ if (pmeter_ != 0) { pmeter_->set(0.); pmeter_->redraw(); rmeter_->set(0.); rmeter_->redraw(); } msched(timer_interval_); } else { cancel(); } } void Controller::DoAudio() { double rlevel, plevel; register int nsmean = ns_->LTMean(); if (ns_->Max()) lastnetout_ = as_->Clock(); ns_->Compute(); plevel = ns_->Mean(); mixaudio(*ns_); ns_->UpdateAGC(); if (audio_->RMuted()) { rlevel = 0.; } else { as_->Compute(); rlevel = as_->Mean(); /* * Next check is to cut residual echo in speakerphone & * echo-cancel modes: If we haven't been talking recently, have * just sent data to the speaker and the signal we got from the * mike was about the same as the signal we played, assume the * mike signal is echo. NOTE that nsmean was grabbed before * ns_->Compute() so we are comparing the previous audio output * to the current audio input - this should make it more likely * that we're looking at the echo signal (though under Sun OS * there's still a lot of buffer in the STREAM I/O system). */ if (audio_->Mode() != Audio::mode_none && !sending() && (as_->LTMean() - nsmean < echo_thresh_ || as_->Silent(talk_thresh_))) ; else if (audio_->Mode() != Audio::mode_netmutesmike || lastnetout_ == 0 || u_int(as_->Clock() - lastnetout_) > echo_suppress_time_) { if (Output()) { lastnetout_ = 0; active_ = 1; as_->UpdateAGC(); } } } /* * Update the meters. We control the rate with METER_UPDATE_FREQ * to cut down on CPU load from the X window updates. */ if (pmeter_ != 0 && --meter_update_ <= 0) { pmeter_->set(plevel); rmeter_->set(rlevel); meter_update_ = METER_UPDATE_FREQ; } } void Controller::DoTimer() { /* Advance to next audio frame */ as_->Advance(); ns_->Advance(); } /* * Called from decoders to mix in the block of samples 'del' * samples into the future. */ void Controller::mix_from_net(int del, const u_int8_t* frame, int len) { if (del >= 0) ns_->Mix(del, frame, len); } void Controller::audio_handle() { DoAudio(); DoTimer(); } /* * Called when we don't have the audio device. Normally, our time * base comes from the audio device's sample clock, so when we don't * have the device open, we revert to timers. */ void Controller::timeout() { /* * Use get time of day and keep track of the current hard time * in the tsec_/tusec_ variables. Tk timers are an unreliable * time base. We use them to dispatch an event here then call * gettimeofday to see how many times we really should have * been called in the intervening period. */ timeval tv; ::gettimeofday(&tv, 0); u_int u = (u_int)tv.tv_usec; u_int s = (u_int)tv.tv_sec; if (s > tsec_ + 3) { /* * We're way behind. Most likely we were suspended and * then resumed. Instead of trying to catch up, just resync. */ tusec_ = u; tsec_ = s; } while ((int(tusec_ - u) <= 0 && s == tsec_) || int(tsec_ - s) < 0) { DoTimer(); while ((tusec_ += 1000 * FRAME_TIME) >= 1000000) { tusec_ -= 1000000; ++tsec_; } } msched(timer_interval_); } /* * send the next block of samples to the encoder. We might coalesce * several calls into a larger chunk (depending on the value of outmax_). * The point is to stuff more data into a single packet to amortize the * packet header overhead (at the cost of increased latency). */ void Controller::send_block(u_int32_t ts, u_int8_t* blk, int len) { if (out_ == 0) { out_ = blk; outlen_ = 0; out_ts_ = ts; } else if (&out_[outlen_] != blk) { /* * frames wrapped in ss buffer -- copy to buf to * keep things contiguous. */ if (out_ != overflow_) { /* first time */ memcpy(overflow_, out_, outlen_); out_ = overflow_; } /* copy current chunk */ memcpy(&out_[outlen_], blk, len); } outlen_ += len; if (outlen_ >= outmax_) { encoder_->encode(out_ts_, out_, outlen_); out_ = 0; } } /* * get the current media timestamp */ u_int32_t Controller::media_ts() { ::gettimeofday(&last_uts_, 0); u_int32_t ts = as_->Clock(); last_mts_ = ts; return (ts); } u_int32_t Controller::ref_ts() { timeval now; ::gettimeofday(&now, 0); int t = (now.tv_sec - last_uts_.tv_sec) * 8000; t += ((now.tv_usec - last_uts_.tv_usec) << 3) / 1000; return (last_mts_ + t); } /* * Check for silence and otherwise send audio frames to the encoder. * If this is the start of a talk-spurt, go back TALK_LEAD blocks into * the past and send them too (because the silence detector isn't * perfect especially near a silence-to-speech transition). At the * end of a talk-spurt, send TALK_TAIL extra blocks because the * silence detector is unreliable near a speech-to-silence transition. */ int Controller::Output() { register int bs = as_->BlkSize(); if (as_->Silent()) { if (ostate_ >= TALK_TAIL) { /* between talk spurts */ if (out_ != 0) { /* flush the partial last block */ send_block(media_ts(), as_->BlkBack(0), bs); return (1); } if (ostate_ < TALK_TAIL+TALK_LEAD) ++ostate_; return (0); } ++ostate_; } else if (ostate_) { /* * if start of talk after silence, generate packets for * any leading speech that we might have missed. */ for (int i = (ostate_ - TALK_TAIL) * bs; i > 0; i -= bs) { u_int32_t ts = as_->Clock() - i; send_block(ts, as_->BlkBack(i), bs); } ostate_ = 0; } send_block(media_ts(), as_->BlkBack(0), bs); return (1); } extern "C" { extern u_char tonemax[]; extern u_char tone0dBm[]; extern u_char tone6dBm[]; } void Controller::mixaudio(SampleStream& ss) { /* * The audio driver is ready to give us the next packet. * This serves as our time base. We do the following: * - output next chunk to audio driver. * (We do the output *first* so the rest of our * processing is overlapped with the real-time audio * output, otherwise we tend to accumulate estimate * random delays.) * - If we're doing echo cancellation, estimate the * echo resulting from the block just output and mix * the echo inverse into the *input* sample stream * at the estimated echo delay. * - read packet from audio * - Mix the input data into the input sample stream * (this is a mix because we might have an inverse * echo signal or tone that we want summed with the * input). */ u_char* blk = 0; int loopback = audio_->GetLoopback(); /*XXX*/ #ifdef notdef int blksize = audio_->BlockSize(); #else int blksize = as_->BlkSize(); #endif if (loopback != Audio::loop_none) { blk = audio_->Read(); switch (loopback) { case Audio::loop_none: break; case Audio::loop_mike: ss.Mix(0, blk, blksize); break; case Audio::loop_tone6: ss.Mix(0, tone6dBm, blksize); break; case Audio::loop_tone0: ss.Mix(0, tone0dBm, blksize); break; case Audio::loop_tonemax: ss.Mix(0, tonemax, blksize); break; } } /* * Now write a block of samples to the audio device provided * the following conditions hold: * * (1) the audio output isn't muted * * (2) there won't be an echo problem i.e., we're not sending, OR * we're in not in the mode where input has priority over output, * OR the mike is muted. this last check is not strictly * necessary but allows the user to immediately hear the * far end after muting the mike (rather than waiting for * TALK_TAIL extra audio blocks to drain). * * (3) the signal to output isn't slience, or we're running * a loopback test. i.e., if we're about to write a * completely silent frame (i.e., no packets at all from * the network) then don't do it. This prevents a backlog * of samples (i.e., a net delay) to build up in the audio * driver. Note that this problem is completely independent * of the silent suppression solution for the outbound path. */ if (! audio_->PMuted() && (!sending() || audio_->Mode() != Audio::mode_mikemutesnet || audio_->RMuted()) && (loopback || (ss.Max() != 0 && ss.LTMean() != 0))) { /* * if we haven't written for a while, * write an extra block to generate a bit * of a backlog between us & the driver. */ u_int32_t sc = as_->Clock(); if (u_int(sc - lastaudout_) > 4*FRAMESIZE) audio_->Write(ss.BlkBack(blksize)); audio_->Write(ss.CurBlk()); lastaudout_ = sc; active_ = 1; #ifdef notyet if (mode == mode_ec && !rmute) { int offset = AdjustTime(0); u_char ecblk[MAXAUDIOSIZE+4]; int resid = offset & 3; if (resid) ecblk[0] = 0x7f7f7f7f; filter->Compute(os, &ecblk[resid], blksize); as->Mix(offset &~ 3, ecblk, blksize); } #endif } if (loopback == Audio::loop_none) blk = audio_->Read(); as_->Mix(0, blk, blksize); } HDController::HDController() { timeval tv; ::gettimeofday(&tv, 0); tsec_ = tv.tv_sec; tusec_ = tv.tv_usec; msched(timer_interval_); } void HDController::update(Observable*) { /* * do nothing -- leave the timer running even when * we have the audio */ } void HDController::timeout() { timeval tv; ::gettimeofday(&tv, 0); u_int u = (u_int)tv.tv_usec; u_int s = (u_int)tv.tv_sec; if (s > tsec_ + 3) { /* * We're way behind. Most likely we were suspended and * then resumed. Instead of trying to catch up, just resync. */ tusec_ = u; tsec_ = s; } while ((int(tusec_ - u) <= 0 && s == tsec_) || int(tsec_ - s) < 0) { DoTimer(); if (audio_->HaveAudio() && audio_->FrameReady()) DoAudio(); while ((tusec_ += 1000 * FRAME_TIME) >= 1000000) { tusec_ -= 1000000; ++tsec_; } } msched(timer_interval_); } void HDController::audio_handle() { printf("HDController::audio_handle()\n"); } int Controller::command(int argc, const char*const* argv) { Tcl& tcl = Tcl::instance(); if (argc == 2) { if (strcmp(argv[1], "ntp-time") == 0) { sprintf(tcl.buffer(), "%u", ntptime()); tcl.result(tcl.buffer()); return (TCL_OK); } if (strcmp(argv[1], "unix-time") == 0) { sprintf(tcl.buffer(), "%u", unixtime().tv_sec); tcl.result(tcl.buffer()); return (TCL_OK); } if (strcmp(argv[1], "media-time") == 0) { sprintf(tcl.buffer(), "%u", as_->Clock()); tcl.result(tcl.buffer()); return (TCL_OK); } if (strcmp(argv[1], "active") == 0) { tcl.result(active_ ? "1" : "0"); return (TCL_OK); } if (strcmp(argv[1], "agc-input") == 0) { sprintf(tcl.buffer(), "%d", as_->AGCLevel() / 10 - 10); tcl.result(tcl.buffer()); return (TCL_OK); } if (strcmp(argv[1], "agc-output") == 0) { sprintf(tcl.buffer(), "%d", ns_->AGCLevel() / 10 - 10); tcl.result(tcl.buffer()); return (TCL_OK); } } else if (argc == 3) { if (strcmp(argv[1], "audio") == 0) { audio_ = (Audio*)TclObject::lookup(argv[2]); audio_->attach(this); audio_->handler(this); update(audio_); return (TCL_OK); } if (strcmp(argv[1], "encoder") == 0) { encoder_ = (PCM_Encoder*)TclObject::lookup(argv[2]); return (TCL_OK); } if (strcmp(argv[1], "input-meter") == 0) { rmeter_ = (VUMeter*)TclObject::lookup(argv[2]); return (TCL_OK); } if (strcmp(argv[1], "output-meter") == 0) { pmeter_ = (VUMeter*)TclObject::lookup(argv[2]); return (TCL_OK); } if (strcmp(argv[1], "silence-thresh") == 0) { int thresh = atoi(argv[2]); as_->ssthresh(thresh); ns_->ssthresh(thresh); return (TCL_OK); } if (strcmp(argv[1], "talk-thresh") == 0) { talk_thresh_ = atoi(argv[2]); return (TCL_OK); } if (strcmp(argv[1], "echothresh") == 0) { echo_thresh_ = atoi(argv[2]); return (TCL_OK); } if (strcmp(argv[1], "echodelay") == 0) { echo_suppress_time_ = atoi(argv[2]) / 20 * FRAMESIZE; return (TCL_OK); } if (strcmp(argv[1], "blocks-per-packet") == 0) { outmax_ = atoi(argv[2]) * FRAMESIZE; return (TCL_OK); } if (strcmp(argv[1], "agc-input") == 0) { int level = atoi(argv[2]); level = 10 * (level + 10); as_->SetAGCLevel(level); return (TCL_OK); } if (strcmp(argv[1], "agc-input-enable") == 0) { as_->DoAGC(atoi(argv[2])); return (TCL_OK); } if (strcmp(argv[1], "agc-output") == 0) { int level = atoi(argv[2]); level = 10 * (level + 10); ns_->SetAGCLevel(level); return (TCL_OK); } if (strcmp(argv[1], "agc-output-enable") == 0) { ns_->DoAGC(atoi(argv[2])); return (TCL_OK); } if (strcmp(argv[1], "active") == 0) { active_ = atoi(argv[2]); return (TCL_OK); } } return (TclObject::command(argc, argv)); }