Ways to capture incoming WebRTC video streams (client side) - google-chrome

I am currently looking to find a best way to store a incoming webrtc video streams. I am joining the videocall using webrtc (via chrome) and I would like to record every incoming video stream to from each participant to the browser.
The solutions I am researching are:
Intercept network packets coming to the browsers e.g. using Whireshark and then decode. Following this article: https://webrtchacks.com/video_replay/
Modifying a browser to store recording as a file e.g. by modifying Chromium itself
Any screen-recorders or using solutions like xvfb & ffmpeg is not an options due the resources constrains. Is there any other way that could let me capture packets or encoded video as a file? The solution must be working on Linux.

if the media stream is what you want a method is to override the browser's PeerConnection. Here is an example:
In an extension manifest add the following content script:
content_scripts": [
{
"matches": ["http://*/*", "https://*/*"],
"js": ["payload/inject.js"],
"all_frames": true,
"match_about_blank": true,
"run_at": "document_start"
}
]
inject.js
var inject = '('+function() {
//overide the browser's default RTCPeerConnection.
var origPeerConnection = window.RTCPeerConnection || window.webkitRTCPeerConnection || window.mozRTCPeerConnection;
//make sure it is supported
if (origPeerConnection) {
//our own RTCPeerConnection
var newPeerConnection = function(config, constraints) {
console.log('PeerConnection created with config', config);
//proxy the orginal peer connection
var pc = new origPeerConnection(config, constraints);
//store the old addStream
var oldAddStream = pc.addStream;
//addStream is called when a local stream is added.
//arguments[0] is a local media stream
pc.addStream = function() {
console.log("our add stream called!")
//our mediaStream object
console.dir(arguments[0])
return oldAddStream.apply(this, arguments);
}
//ontrack is called when a remote track is added.
//the media stream(s) are located in event.streams
pc.ontrack = function(event) {
console.log("ontrack got a track")
console.dir(event);
}
window.ourPC = pc;
return pc;
};
['RTCPeerConnection', 'webkitRTCPeerConnection', 'mozRTCPeerConnection'].forEach(function(obj) {
// Override objects if they exist in the window object
if (window.hasOwnProperty(obj)) {
window[obj] = newPeerConnection;
// Copy the static methods
Object.keys(origPeerConnection).forEach(function(x){
window[obj][x] = origPeerConnection[x];
})
window[obj].prototype = origPeerConnection.prototype;
}
});
}
}+')();';
var script = document.createElement('script');
script.textContent = inject;
(document.head||document.documentElement).appendChild(script);
script.parentNode.removeChild(script);
I tested this with a voice call in google hangouts and saw that two mediaStreams where added via pc.addStream and one track was added via pc.ontrack. addStream would seem to be local media streams and the event object in ontrack is a RTCTrackEvent which has a streams object. which I assume are what you are looking for.
To access these streams from your extenion's content script you will need to create audio elements and set the "srcObject" property to the media stream: e.g.
pc.ontrack = function(event) {
//check if our element exists
var elm = document.getElementById("remoteStream");
if(elm == null) {
//create an audio element
elm = document.createElement("audio");
elm.id = "remoteStream";
}
//set the srcObject to our stream. not sure if you need to clone it
elm.srcObject = event.streams[0].clone();
//write the elment to the body
document.body.appendChild(elm);
//fire a custom event so our content script knows the stream is available.
// you could pass the id in the "detail" object. for example:
//CustomEvent("remoteStreamAdded", {"detail":{"id":"audio_element_id"}})
//then access if via e.detail.id in your event listener.
var e = CustomEvent("remoteStreamAdded");
window.dispatchEvent(e);
}
Then in your content script you can listen for that event/access the mediastream like so:
window.addEventListener("remoteStreamAdded", function(e) {
elm = document.getElementById("remoteStream");
var stream = elm.captureStream();
})
With the capture stream available to your content script you can do pretty much anything you want with it. For example, MediaRecorder works really well for recording the stream(s) or you could use something like peer.js or maybe binary.js to stream to another source.
I haven't tested this but it should also be possible to override the local streams. For example, in the inject.js you could establish some blank mediastream, override navigator.mediaDevices.getUserMedia and instead of returning the local mediastream return your own mediastream.
This method should work in firefox and maybe others as well assuming you use an extenion/app to load the inject.js script at the start of the document. It being loaded before any of the target's libs is key to making this work.
edited for more detail
edited for even more detail

Capturing packets will only give you the network packets which you would then need to turn into frames and put into a container. A server such as Janus can record videos.
Running headless chrome and using the javascript MediaRecorder API is another option but much more heavy on resources.

Related

Properly using chrome.tabCapture in a manifest v3 extension

Edit:
As the end of the year and the end of Manifest V2 is approaching I did a bit more research on this and found the following workarounds:
The example here that uses the desktopCapture API:
https://github.com/GoogleChrome/chrome-extensions-samples/issues/627
The problem with this approach is that it requires the user to select a capture source via some UI which can be disruptive. The --auto-select-desktop-capture-source command line switch can apparently be used to bypass this but I haven't been able to use it with success.
The example extension here that works around tabCapture not working in
service workers by creating its own inactive tab from
which to access the tabCapture API and record the currently
active tab:
https://github.com/zhw2590582/chrome-audio-capture
So far this seems to be the best solution I've found so far in terms of UX. The background page provided in Manifest V2 is essentially replaced with a phantom tab.
The roundaboutedness of the second solution also seems to suggest that the tabCapture API is essentially not intended for use in Manifest V3, or else there would have been a more straightforward way to use it. I am disappointed that Manifest V3 is being enforced while essentially leaving behind Manifest V2 features such as this one.
Original Post:
I'm trying to write a manifest v3 Chrome extension that captures tab audio. However as far as I can tell, with manifest v3 there are some changes that make this a bit difficult:
Background scripts are replaced by service workers.
Service workers do not have access to the chrome.tabCapture API.
Despite this I managed to get something that nearly works as popup scripts still have access to chrome.tabCapture. However, there is a drawback - the audio of the tab is muted and there doesn't seem to be a way to unmute it. This is what I have so far:
Query the service worker current tab from the popup script.
let tabId;
// Fetch tab immediately
chrome.runtime.sendMessage({command: 'query-active-tab'}, (response) => {
tabId = response.id;
});
This is the service worker, which response with the current tab ID.
chrome.runtime.onMessage.addListener(
(request, sender, sendResponse) => {
// Popup asks for current tab
if (request.command === 'query-active-tab') {
chrome.tabs.query({active: true}, (tabs) => {
if (tabs.length > 0) {
sendResponse({id: tabs[0].id});
}
});
return true;
}
...
Again in the popup script, from a keyboard shortcut command, use chrome.tabCapture.getMediaStreamId to get a media stream ID to be consumed by the current tab, and send that stream ID back to the service worker.
// On command, get the stream ID and forward it back to the service worker
chrome.commands.onCommand.addListener((command) => {
chrome.tabCapture.getMediaStreamId({consumerTabId: tabId}, (streamId) => {
chrome.runtime.sendMessage({
command: 'tab-media-stream',
tabId: tabId,
streamId: streamId
})
});
});
The service worker forwards that stream ID to the content script.
chrome.runtime.onMessage.addListener(
(request, sender, sendResponse) => {
...
// Popup sent back media stream ID, forward it to the content script
if (request.command === 'tab-media-stream') {
chrome.tabs.sendMessage(request.tabId, {
command: 'tab-media-stream',
streamId: request.streamId
});
}
}
);
The content script uses navigator.mediaDevices.getUserMedia to get the stream.
// Service worker sent us the stream ID, use it to get the stream
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
navigator.mediaDevices.getUserMedia({
video: false,
audio: true,
audio: {
mandatory: {
chromeMediaSource: 'tab',
chromeMediaSourceId: request.streamId
}
}
})
.then((stream) => {
// Once we're here, the audio in the tab is muted
// However, recording the audio works!
const recorder = new MediaRecorder(stream);
const chunks = [];
recorder.ondataavailable = (e) => {
chunks.push(e.data);
};
recorder.onstop = (e) => saveToFile(new Blob(chunks), "test.wav");
recorder.start();
setTimeout(() => recorder.stop(), 5000);
});
});
Here is the code that implements the above: https://github.com/killergerbah/-test-tab-capture-extension
This actually does produce a MediaStream, but the drawback is that the sound of the tab is muted. I've tried playing the stream through an audio element, but that seems to do nothing.
Is there a way to obtain a stream of the tab audio in a manifest v3 extension without muting the audio in the tab?
I suspect that this approach might be completely wrong as it's so roundabout, but this is the best I could come up with after reading through the docs and various StackOverflow posts.
I've also read that the tabCapture API is going to be moved for manifest v3 at some point, so maybe the question doesn't even make sense to ask - however if there is a way to still properly use it I would like to know.
I found your post very useful in progressing my implementation of an audio tab recorder.
Regarding the specific muting issue you were running into, I resolved it by looking here: Original audio of tab gets muted while using chrome.tabCapture.capture() and MediaRecorder()
// Service worker sent us the stream ID, use it to get the stream
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
navigator.mediaDevices.getUserMedia({
video: false,
audio: true,
audio: {
mandatory: {
chromeMediaSource: 'tab',
chromeMediaSourceId: request.streamId
}
}
})
.then((stream) => {
// To resolve original audio muting
context = new AudioContext();
var audio = context.createMediaStreamSource(stream);
audio.connect(context.destination);
const recorder = new MediaRecorder(stream);
const chunks = [];
recorder.ondataavailable = (e) => {
chunks.push(e.data);
};
recorder.onstop = (e) => saveToFile(new Blob(chunks), "test.wav");
recorder.start();
setTimeout(() => recorder.stop(), 5000);
});
});
This may not be exactly what you are looking for, but perhaps it may provide some insight.
I've tried playing the stream through an audio element, but that seems to do nothing.
Ironically this is how I managed to get around the issue; by creating an object in the popup itself. When using tabCapture in the popup script, it returns the stream, and I set the audio srcObject to that stream.
HTML:
<audio id="audioObject" autoplay> No source detected </audio>
JS:
chrome.tabCapture.capture({audio: true, video: false}, function(stream) {
var audio = document.getElementById("audioObject");
audio.srcObject = stream
})
According to this post on Manifest V3, chrome.capture will be the new namespace for tabCapture and the like, but I haven't seen anything beyond that.
I had this problem too, and I resolve it by using Web Audio API. Just create a new context and conect it to a media stream source using the captures MediaStream, this is an example:
avoidSilenceInTab: (desktopStream: MediaStream) => {
var contextTab = new AudioContext();
contextTab
.createMediaStreamSource(desktopStream)
.connect(contextTab.destination);
}

Chrome produces no audio after reaching 50 audio output streams

During my testing, I have found out that reaching 50 audio output streams (as displayed in chrome://media-internals/ Audio tab) on a single tab causes the audio output to disappear. Does Chrome have a set maximum limit of audio output streams allowed per displayed tab? If so, is there some workaround for that? The Chrome version that I am using is Version 87.0.4280.141.
Whenever we're muting/unmuting the audio(second function below) and adjusting the mic volume(first function below), we create a new audio context. Does too many audio context instances caused the issue?
private setLocalStreamVolume(stream: MediaStream | undefined) {
const context = new AudioContext()
const destination = context.createMediaStreamDestination()
const gainNode = context.createGain()
if (stream) {
for(const track of stream.getTracks()){
const sourceStream = context.createMediaStreamSource(new MediaStream([track]));
sourceStream.connect(gainNode)
gainNode.connect(destination)
gainNode.gain.value = this._micVolume
}
}
return destination.stream
}
export function mixStreams(streams: Iterable<(MediaStream | undefined)>) {
const context = new AudioContext()
const mixedOutput = context.createMediaStreamDestination()
for(const stream of streams)
if(stream)
for(const track of stream.getTracks()){
const sourceStream = context.createMediaStreamSource(new MediaStream([track]));
sourceStream.connect(mixedOutput);
}
return mixedOutput.stream.getTracks()[0]
}
Does too many audio context interactions caused the issue?
Too many AudioContext instances certainly will. In fact, on some systems you can only use a single AudioContext.
I'm not sure what your specific use case is, but you probably only need one AudioContext. All your MediaStreamSourceNodes can live in the same context.

Play stream from gstreamer in browser

I want to play stream from gstreamer in a web browser.
I played around a with RTP, WebRTC and SDP files but, while VLC was able to connect to stream by simple SDP, browsers were not. I later understood that WebRTC requires secure connection which only complicates things and is not needed for my purposes. I stumbled upon Media Source Extension (MSE) of html5, which seems that it could help, but I'm not able to find some comprehensive tutorial or appropriate specs on how to get gstreamer to stream correct data and later how to play them using MSE. I'm also not sure about latency with using MSE.
So is there a way to play stream from gstreamer in a browser?
Thanks.
Using node webrtc project, I was able to combine output from gstreamer with webrtc call. For gstreamer, there is a project which enables it's use with node gstreamer superficial. So basically, you need to run gstremaer process from node process, which can then control output from gstremaer. On every gstreamer frame there is a callback called which takes the frame and can send it to webrtc calls.
Then an webrtc calls needs to be implemented. There is required some signaling protocol for calls. One side of the call will be the server and another will be the client's browser, instead of two browsers. Then a video track will be created where frames from gstreamer superficial will be pushed.
const { RTCVideoSource } = require("wrtc").nonstandard;
const gstreamer = require("gstreamer-superficial");
const source = new RTCVideoSource();
// This is WebRTC video track which should be used with addTransceiver see below
const track = source.createTrack();
const frame = {
width: 1920,
height: 1080,
data: null
};
const pipeline = new gstreamer.Pipeline("v4l2src ! videorate ! video/x-raw,format=YUY2,width=1920,height=1080,framerate=25/1 ! videoconvert ! video/x-raw,format=I420 ! appsink name=sink");
const appsink = pipeline.findChild("sink");
const pull = function() {
appsink.pull(function(buf, caps) {
if (buf) {
frame.data = new Uint8Array(buf);
try {
source.onFrame(frame);
} catch (e) {}
pull();
} else if (!caps) {
console.log("PULL DROPPED");
setTimeout(pull, 500);
}
});
};
pipeline.play();
pull();
// Example:
const useTrack = SomeRTCPeerConnection => SomeRTCPeerConnection.addTransceiver(track, { direction: "sendonly" });

Chromecast subtitles on default receiver applications

I am trying to include subtitles on a Chromecast application I'm building.
I am using the default receiver application.
I am writing a chrome sender application using v1 of the chrome sender api.
According to the Chromecast Sender Api documentation, I should be passing in an array of track objects into the chrome.cast.media.MediaInfo object. My issue is, whenever I call chrome.cast.media.Track(trackId, trackType), it returns undefined. When I look through the public methods in chrome.cast.media, through console, I don't see anything related to Track. Link to documentation here.
Below is my loadMedia method where I try to include an array of track objects along with my LoadRequest as specified by the cast api. The commented out code is how I've seen closed-captioning handled in one of the cast Github repositories, but unfortunately I believe you have to handle that customData in your own custom receiver application.
Are subtitles through the chrome sender SDK possible yet, or does one have to build their own receiver application and specifically handle text tracking through passed in customData? Am I potentially using the wrong sender api?
function loadMedia() {
mediaUrl = decodeURIComponent(_player.sources.mp4);
var mediaInfo = new chrome.cast.media.MediaInfo(mediaUrl);
mediaInfo.contentType = 'video/mp4';
var track1 = new chrome.cast.media.Track(1, chrome.cast.media.TrackType.TEXT);
track1.trackContentId = "https://dl.dropboxusercontent.com/u/35106650/test.vtt";
mediaInfo.tracks = [track1];
var request = new chrome.cast.media.LoadRequest(mediaInfo);
// var json = {
// cc: {
// tracks: [{
// src: "https://dl.dropboxusercontent.com/u/35106650/test.vtt"
// }],
// active: 0
// }
// };
// request.customData = json;
session.loadMedia(request, onMediaDiscovered.bind(this, 'loadMedia'), onMediaError);
}
Currently, neither the Default nor the Styled Receivers support Closed Caption; you need to create your own. We have a sample in our GitHub repo that can be used for doing exactly that.
Update: Styled and Default receivers now support Tracks, see our documentations.

Web Audio API: How to load another audio file?

I want to write a basic script for HTML5 Web Audio API, can play some audio files. But I don't know how to unload a playing audio and load another one. In my script two audio files are playing in the same time,but not what I wanted.
Here is my code:
var context,
soundSource,
soundBuffer;
// Step 1 - Initialise the Audio Context
context = new webkitAudioContext();
// Step 2: Load our Sound using XHR
function playSound(url) {
// Note: this loads asynchronously
var request = new XMLHttpRequest();
request.open("GET", url, true);
request.responseType = "arraybuffer";
// Our asynchronous callback
request.onload = function() {
var audioData = request.response;
audioGraph(audioData);
};
request.send();
}
// This is the code we are interested in
function audioGraph(audioData) {
// create a sound source
soundSource = context.createBufferSource();
// The Audio Context handles creating source buffers from raw binary
soundBuffer = context.createBuffer(audioData, true/* make mono */);
// Add the buffered data to our object
soundSource.buffer = soundBuffer;
// Plug the cable from one thing to the other
soundSource.connect(context.destination);
// Finally
soundSource.noteOn(context.currentTime);
}
// Stop all of sounds
function stopSounds(){
// How can do this?
}
// Events for audio buttons
document.querySelector('.pre').addEventListener('click',
function () {
stopSounds();
playSound('http://thelab.thingsinjars.com/web-audio-tutorial/hello.mp3');
}
);
document.querySelector('.next').addEventListener('click',
function() {
stopSounds();
playSound('http://thelab.thingsinjars.com/web-audio-tutorial/nokia.mp3');
}
);
You should be pre-loading sounds into buffers once, at launch, and simply resetting the AudioBufferSourceNode whenever you want to play it back.
To play multiple sounds in sequence, you need to schedule them using noteOn(time), one after the other, based on buffer respective lengths.
To stop sounds, use noteOff.
Sounds like you are missing some fundamental web audio concepts. This (and more) is described in detail and shown with samples in this HTML5Rocks tutorial and the FAQ.