Is there any way to retrieve a page\'s javascript variables from a Google Chrome Content Script?
If you really need to, you can insert a <script>
element into the page's DOM; the code inside your <script>
element will be executed and that code will have access to JavaScript variables at the scope of the window. You can then communicate them back to the content script using data-
attributes and firing custom events.
Sound awkward? Why yes, it is, and intentionally so for all the reasons in the documentation that serg has cited. But if you really, really need to do it, it can be done. See here and here for more info. And good luck!
Chrome's documentation gives you a good starting point: https://developer.chrome.com/extensions/content_scripts#host-page-communication
This method allows you to extract a global page variable to your content script. It also uses an idea to only accept incoming messages that you recognize given your handshake. You can also just use Math.random()
for the handshake but I was having some fun.
propagateVariable
and passes the current handShake and targeted variable name into the string for preservation since the function will not have access to our content script scope.const globalToExtract = 'someVariableName';
const array = new Uint32Array(5);
const handShake = window.crypto.getRandomValues(array).toString();
function propagateVariable(handShake, variableName) {
const message = { handShake };
message[variableName] = window[variableName];
window.postMessage(message, "*");
}
(function injectPropagator() {
const script = `( ${propagateVariable.toString()} )('${handShake}', '${globalToExtract}');`
const scriptTag = document.createElement('script');
const scriptBody = document.createTextNode(script);
scriptTag.id = 'chromeExtensionDataPropagator';
scriptTag.appendChild(scriptBody);
document.body.append(scriptTag);
})();
window.addEventListener("message", function({data}) {
console.log("INCOMINGGGG!", data);
// We only accept messages from ourselves
if (data.handShake != handShake) return;
console.log("Content script received: ", data);
}, false);
function extractGlobal(variableName) {
const array = new Uint32Array(5);
const handShake = window.crypto.getRandomValues(array).toString();
function propagateVariable(handShake, variableName) {
const message = { handShake };
message[variableName] = window[variableName];
window.postMessage(message, "*");
}
(function injectPropagator() {
const script = `( ${propagateVariable.toString()} )('${handShake}', '${variableName}');`
const scriptTag = document.createElement('script');
const scriptBody = document.createTextNode(script);
scriptTag.id = 'chromeExtensionDataPropagator';
scriptTag.appendChild(scriptBody);
document.body.append(scriptTag);
})();
return new Promise(resolve => {
window.addEventListener("message", function({data}) {
// We only accept messages from ourselves
if (data.handShake != handShake) return;
resolve(data);
}, false);
});
}
extractGlobal('someVariableName').then(data => {
// Do Work Here
});
I would recommend tossing the class into its own file and exporting it as a default if using es modules. Then it simply becomes:
ExtractPageVariable('someGlobalPageVariable').data.then(pageVar => {
// Do work here
I created a little helper method, have fun :)
to retrieve the window's variables "lannister", "always", "pays", "his", "debts", you execute the following:
var windowVariables = retrieveWindowVariables(["lannister", "always", "pays", "his", "debts"]);
console.log(windowVariables.lannister);
console.log(windowVariables.always);
my code:
function retrieveWindowVariables(variables) {
var ret = {};
var scriptContent = "";
for (var i = 0; i < variables.length; i++) {
var currVariable = variables[i];
scriptContent += "if (typeof " + currVariable + " !== 'undefined') $('body').attr('tmp_" + currVariable + "', " + currVariable + ");\n"
}
var script = document.createElement('script');
script.id = 'tmpScript';
script.appendChild(document.createTextNode(scriptContent));
(document.body || document.head || document.documentElement).appendChild(script);
for (var i = 0; i < variables.length; i++) {
var currVariable = variables[i];
ret[currVariable] = $("body").attr("tmp_" + currVariable);
$("body").removeAttr("tmp_" + currVariable);
}
$("#tmpScript").remove();
return ret;
}
please note that i used jQuery.. you can easily use the native js "removeAttribute" and "removeChild" instead.
As explained partially in other answers, the JS variables from the page are isolated from your Chrome extension content script. Normally, there's no way to access them.
But if you inject a JavaScript tag in the page, you will have access to whichever variables are defined there.
I use a utility function to inject my script in the page:
/**
* inject - Inject some javascript in order to expose JS variables to our content JavaScript
* @param {string} source - the JS source code to execute
* Example: inject('(' + myFunction.toString() + ')()');
*/
function inject(source) {
const j = document.createElement('script'),
f = document.getElementsByTagName('script')[0];
j.textContent = source;
f.parentNode.insertBefore(j, f);
f.parentNode.removeChild(j);
}
Then you can do:
function getJSvar(whichVar) {
document.body.setAttribute('data-'+whichVar,whichVar);
}
inject('(' + getJSvar.toString() + ')("somePageVariable")');
var pageVar = document.body.getAttribute('data-somePageVariable');
Note that if the variable is a complex data type (object, array...), you will need to store the value as a JSON string in getJSvar(), and JSON.parse it back in your content script.
I actually worked around it using the localStorge API. Note: to use this, our contentscript should be able to read the localStorage. In the manifest.json file, just add the "storage" string:
"permissions": [...,"storage"]
The hijack function lives in the content script:
function hijack(callback) {
"use strict";
var code = function() {
//We have access to topframe - no longer a contentscript
var ourLocalStorageObject = {
globalVar: window.globalVar,
globalVar2: window.globalVar2
};
var dataString = JSON.stringify(ourLocalStorageObject);
localStorage.setItem("ourLocalStorageObject", dataString);
};
var script = document.createElement('script');
script.textContent = '(' + code + ')()';
(document.head||document.documentElement).appendChild(script);
script.parentNode.removeChild(script);
callback();
}
Now we can call from the contentscript
document.addEventListener("DOMContentLoaded", function(event) {
hijack(callback);
});
or if you use jQuery in your contentscript, like I do:
$(document).ready(function() {
hijack(callback);
});
to extract the content:
function callback() {
var localStorageString = localStorage.getItem("ourLocalStorageObject");
var ourLocalStorageObject= JSON.parse(localStorageString);
console.log("I can see now on content script", ourLocalStorageObject);
//(optional cleanup):
localStorage.removeItem("ourLocalStorageObject");
}
This can be called multiple times, so if your page changes elements or internal code, you can add event listeners to update your extension with the new data.
Edit: I've added callbacks so you can be sure your data won't be invalid (had this issue myself)
This is way late but I just had the same requirement & created a simple standalone class to make getting variable values (or calling functions on objects in the page) really really easy. I used pieces from other answers on this page, which were very useful.
The way it works is to inject a script tag into the page which accesses the variable you want, then it creates a div to hold the serialised version of the value as innerText. It then reads & deserialises this value, deletes the div and script elements it injected, so the dom is back to exactly what it was before.
var objNativeGetter = {
divsToTidyup: [],
DIVID: 'someUniqueDivId',
_tidyUp: function () {
console.log(['going to tidy up ', this.divsToTidyup]);
var el;
while(el = this.divsToTidyup.shift()) {
console.log('removing element with ID : ' + el.getAttribute('id'));
el.parentNode.removeChild(el);
}
},
// create a div to hold the serialised version of what we want to get at
_createTheDiv: function () {
var div = document.createElement('div');
div.setAttribute('id', this.DIVID);
div.innerText = '';
document.body.appendChild(div);
this.divsToTidyup.push(div);
},
_getTheValue: function () {
return JSON.parse(document.getElementById(this.DIVID).innerText);
},
// find the page variable from the stringified version of what you would normally use to look in the symbol table
// eg. pbjs.adUnits would be sent as the string: 'pbjs.adUnits'
_findTheVar: function (strIdentifier) {
var script = document.createElement('script');
script.setAttribute('id', 'scrUnique');
script.textContent = "\nconsole.log(['going to stringify the data into a div...', JSON.stringify(" + strIdentifier + ")]);\ndocument.getElementById('" + this.DIVID + "').innerText = JSON.stringify(" + strIdentifier + ");\n";
(document.head||document.documentElement).appendChild(script);
this.divsToTidyup.push(script);
},
// this is the only call you need to make eg.:
// var val = objNativeGetter.find('someObject.someValue');
// sendResponse({theValueYouWant: val});
find: function(strIdentifier) {
this._createTheDiv();
this._findTheVar(strIdentifier);
var ret = this._getTheValue();
this._tidyUp();
return ret;
}
};
You use it like this:
chrome.runtime.onMessage.addListener(
function(request, sender, sendResponse) {
var objNativeGetter = {
.... the object code, above
}
// do some validation, then carefully call objNativeGetter.find(...) with a known string (don't use any user generated or dynamic string - keep tight control over this)
var val = objNativeGetter.find('somePageObj.someMethod()');
sendResponse({theValueYouWant: val});
}
);