I want to monitor the network of a page and get all the URLs of the JavaScript network events, similar to what PhantomJS' page.onResourceRequested
is doing, but I couldn't figure it out how to do this with Google Chrome's Puppeteer.
I've been dabbling with Google Chrome's puppeteer, but I couldn't figure out how to make it work, as the output of it looks like this:
Page {
domain: null,
_events: {
request: [Function]
},
_eventsCount: 1,
_maxListeners: undefined,
_client: Session {
domain: null,
_events: {
'Page.frameAttached': [Function],
'Page.frameNavigated': [Function],
'Page.frameDetached': [Function],
'Runtime.executionContextCreated': [Function],
'Network.requestWillBeSent': [Function: bound _onRequestWillBeSent],
'Network.requestIntercepted': [Function: bound _onRequestIntercepted],
'Network.responseReceived': [Function: bound _onResponseReceived],
'Network.loadingFinished': [Function: bound _onLoadingFinished],
'Network.loadingFailed': [Function: bound _onLoadingFailed],
'Page.loadEventFired': [Function],
'Runtime.consoleAPICalled': [Function],
'Page.javascriptDialogOpening': [Function],
'Runtime.exceptionThrown': [Function],
'Security.certificateError': [Function],
'Inspector.targetCrashed': [Function]
},
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: Connection {
domain: null,
_events: {},
_eventsCount: 0,
_maxListeners: undefined,
_url: 'ws://127.0.0.1:65343/devtools/browser/ca214df4-4357-4b8f-8552-a1524d6652ff',
_lastId: 17,
_callbacks: Map {},
_delay: 0,
_ws: [Object],
_sessions: [Object]
},
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_keyboard: Keyboard {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_modifiers: 0,
_pressedKeys: Set {}
},
_mouse: Mouse {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_keyboard: Keyboard {
_client: [Object],
_modifiers: 0,
_pressedKeys: Set {}
},
_x: 0,
_y: 0,
_button: 'none'
},
_frameManager: FrameManager {
domain: null,
_events: {
frameattached: [Function],
framedetached: [Function],
framenavigated: [Function]
},
_eventsCount: 3,
_maxListeners: undefined,
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_mouse: Mouse {
_client: [Object],
_keyboard: [Object],
_x: 0,
_y: 0,
_button: 'none'
},
_frames: Map {
'232.1' => [Object]
},
_mainFrame: Frame {
_client: [Object],
_mouse: [Object],
_parentFrame: null,
_url: 'http://mytestdomain.com/',
_id: '232.1',
_defaultContextId: 4,
_waitTasks: Set {},
_childFrames: Set {},
_name: undefined,
_loadingFailed: false
}
},
_networkManager: NetworkManager {
domain: null,
_events: {
request: [Function],
response: [Function],
requestfailed: [Function],
requestfinished: [Function]
},
_eventsCount: 4,
_maxListeners: undefined,
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_requestIdToRequest: Map {},
_interceptionIdToRequest: Map {
null => [Object], 'id-1' => [Object], 'id-2' => [Object], 'id-3' => [Object]
},
_extraHTTPHeaders: Map {},
_requestInterceptionEnabled: true,
_requestHashToRequestIds: Multimap {
_map: [Object]
},
_requestHashToInterceptions: Multimap {
_map: Map {}
}
},
_emulationManager: EmulationManager {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_emulatingMobile: false,
_injectedTouchScriptId: null
},
_tracing: Tracing {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_recording: false,
_path: ''
},
_pageBindings: Map {},
_ignoreHTTPSErrors: false,
_screenshotTaskQueue: TaskQueue {
_chain: Promise {
undefined
}
},
_viewport: {
width: 800,
height: 600
}
}
Can you please tell me how can I get all the URLs of the JavaScript network events with Puppeteer?
Check out the sample that intercepts image requests. Easy to modify that to look at other types of resource requests:
await page.setRequestInterceptionEnabled(true);
page.on('request', request => {
if (/\.js$/i.test(request.url)) {
// request for js resource
}
request.continue();
});
await page.goto('https://example.com');
Came across this post and SetRequestInterceptionEnabled has been renamed to
page.setRequestInterception(value)
Here is a piece of code i found on the Documentation:
const puppeteer = require('puppeteer');
puppeteer.launch().then(async browser => {
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', interceptedRequest => {
if (interceptedRequest.url.endsWith('.png') || interceptedRequest.url.endsWith('.jpg'))
interceptedRequest.abort();
else
interceptedRequest.continue();
});
await page.goto('https://example.com');
await browser.close();
});
NOTE Enabling request interception disables page caching.
Here is the URL for the puppeteer Documentation: Puppeteer Documentation
来源:https://stackoverflow.com/questions/45941601/how-to-inspect-network-traffic-and-get-the-url-of-resource-requests