Realtime visualisation bottleneck with pyqtgraph / PlotCurveItem

我与影子孤独终老i 提交于 2021-01-29 12:32:03


I am currently using pyqtgraph to visualize realtime data for 64 independent data traces/plots. While the speed is realtively good, I noticed a serious slow down if the sample buffer length reaches beyond 2000 points. Profiling the following code yields that seems to have a major impact:

import numpy
import cProfile
import logging

import pyqtgraph as pg
from PyQt5 import QtCore,uic
from PyQt5.QtGui import *
from PyQt5.QtCore import QRect, QTimer

def program(columns=8, samples=10000, channels=64):
    app = QApplication([])
    win = pg.GraphicsWindow()

    data            = numpy.zeros((samples, channels+1))
    plots           = [win.addPlot(row=i/columns+1,col=i%columns) for i in range(channels)]
    curves          = list()

    x = numpy.linspace(0, 1, samples, endpoint=True)
    f = 2 # Frequency in Hz
    A = 1 # Amplitude in Unit
    y = A * numpy.sin(2*numpy.pi*f*x).reshape((samples,1)) # Signal

    data[:,0]   = x
    data[:,1:]  = numpy.repeat(y, channels, axis=1)
    for chn_no,p in enumerate(plots, 1):
        c       = pg.PlotCurveItem(pen=(chn_no,channels * 1.3))
        curves.append((c, chn_no))
    def update():
        nonlocal data

        data[:,1:] = numpy.roll(data[:,1:], 100, axis=0)
        for curve,data_index in curves:

    timer = QTimer()
    return app.exec_()   

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)"program()", sort="cumtime")
  ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000  533.660  533.660 {built-in method builtins.exec}
        1    0.053    0.053  533.660  533.660 <string>:1(<module>)
        1    0.018    0.018  533.607  533.607
        1    9.181    9.181  532.209  532.209 {built-in method exec_}
     2709    0.015    0.000  401.728    0.148
     2709   15.572    0.006  401.696    0.148 {paintEvent}
   173376    0.193    0.000  345.725    0.002
   173376    1.599    0.000  345.532    0.002
   173312    0.671    0.000  271.973    0.002
   173312    0.744    0.000  271.153    0.002
   173312  266.888    0.002  270.409    0.002
     2709    5.102    0.002  113.195    0.042
   173440    0.193    0.000  100.616    0.001
   173440    8.718    0.000  100.424    0.001

So almost 1.5 ms per call is spent. Playing around with the arrayToQPath I noticed that soley the ds >> path within the arrayToQPath seems to consum most of the time (results with that line commented out):

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000  190.847  190.847 {built-in method builtins.exec}
        1    0.050    0.050  190.847  190.847 <string>:1(<module>)
        1    0.017    0.017  190.796  190.796
        1    7.438    7.438  189.395  189.395 {built-in method exec_}
     2221    4.165    0.002   88.497    0.040
     2221    0.010    0.000   86.830    0.039
     2221   11.494    0.005   86.806    0.039 {paintEvent}
   142208    0.152    0.000   77.941    0.001
   142208    4.500    0.000   77.789    0.001

ds is a QtCore.QDataStream and path is QPainterPath. However, the reason why the >> operation takes so much time completely eludes me. So I am looking for a possiblitly to speed up the rendering and would like to stick to pyqtgraph i.e. not perform a switch to e.g. vispy right now.

The original arrayToQPath:

def arrayToQPath(x, y, connect='all'):
    """Convert an array of x,y coordinats to QPainterPath as efficiently as possible.
    The *connect* argument may be 'all', indicating that each point should be
    connected to the next; 'pairs', indicating that each pair of points
    should be connected, or an array of int32 values (0 or 1) indicating

    ## Create all vertices in path. The method used below creates a binary format so that all
    ## vertices can be read in at once. This binary format may change in future versions of Qt,
    ## so the original (slower) method is left here for emergencies:
        #path.moveTo(x[0], y[0])
        #if connect == 'all':
            #for i in range(1, y.shape[0]):
                #path.lineTo(x[i], y[i])
        #elif connect == 'pairs':
            #for i in range(1, y.shape[0]):
                #if i%2 == 0:
                    #path.lineTo(x[i], y[i])
                    #path.moveTo(x[i], y[i])
        #elif isinstance(connect, np.ndarray):
            #for i in range(1, y.shape[0]):
                #if connect[i] == 1:
                    #path.lineTo(x[i], y[i])
                    #path.moveTo(x[i], y[i])
            #raise Exception('connect argument must be "all", "pairs", or array')

    ## Speed this up using >> operator
    ## Format is:
    ##    numVerts(i4)   0(i4)
    ##    x(f8)   y(f8)   0(i4)    <-- 0 means this vertex does not connect
    ##    x(f8)   y(f8)   1(i4)    <-- 1 means this vertex connects to the previous vertex
    ##    ...
    ##    0(i4)
    ## All values are big endian--pack using struct.pack('>d') or struct.pack('>i')

    path = QtGui.QPainterPath()

    #profiler = debug.Profiler()
    n = x.shape[0]
    # create empty array, pad with extra space on either end
    arr = np.empty(n+2, dtype=[('x', '>f8'), ('y', '>f8'), ('c', '>i4')])
    # write first two integers
    #profiler('allocate empty')
    byteview = arr.view(dtype=np.ubyte)
    byteview[:12] = 0[12:20] = struct.pack('>ii', n, 0)
    #profiler('pack header')
    # Fill array with vertex values
    arr[1:-1]['x'] = x
    arr[1:-1]['y'] = y

    # decide which points are connected by lines
    if eq(connect, 'all'):
        arr[1:-1]['c'] = 1
    elif eq(connect, 'pairs'):
        arr[1:-1]['c'][::2] = 1
        arr[1:-1]['c'][1::2] = 0
    elif eq(connect, 'finite'):
        arr[1:-1]['c'] = np.isfinite(x) & np.isfinite(y)
    elif isinstance(connect, np.ndarray):
        arr[1:-1]['c'] = connect
        raise Exception('connect argument must be "all", "pairs", "finite", or array')

    #profiler('fill array')
    # write last 0
    lastInd = 20*(n+1)[lastInd:lastInd+4] = struct.pack('>i', 0)
    # create datastream object and stream into path

    ## Avoiding this method because QByteArray(str) leaks memory in PySide
    #buf = QtCore.QByteArray([12:lastInd+4])  # I think one unnecessary copy happens here

    path.strn =[12:lastInd+4] # make sure data doesn't run away
        buf = QtCore.QByteArray.fromRawData(path.strn)
    except TypeError:
        buf = QtCore.QByteArray(bytes(path.strn))
    #profiler('create buffer')
    ds = QtCore.QDataStream(buf)

    ds >> path

    return path


Taking a closer look into QT revealed that the QDataStream >> operator in C++ is comparable slow. it is so slow, that overwriting the positions of the elements inside an old QtGui.QPainterPath() instead of creating a new one is faster:

import timeit
import struct
import numpy as np
from PyQt5 import QtGui,QtCore

no_trys = 1000

def test(pass_data, samples = 10000):
    path = QtGui.QPainterPath()

    n = samples
    # create empty array, pad with extra space on either end
    arr = np.zeros(n+2, dtype=[('x', '>f8'), ('y', '>f8'), ('c', '>i4')])
    # write first two integers
    byteview = arr.view(dtype=np.ubyte)[12:20] = struct.pack('>ii', n, 0)

    # write last 0
    lastInd = 20*(n+1)
    # create datastream object and stream into path
    path.strn =[12:lastInd+4] # make sure data doesn't run away
    buf = QtCore.QByteArray.fromRawData(path.strn)
    ds = QtCore.QDataStream(buf)

    if pass_data:
        ds >> path

    def func1():
        nonlocal path

        ds = QtCore.QDataStream(buf)
        ds >> path

    def func2():
        nonlocal path
        values = [(i,i,i) for i in range(samples)]
        map(path.setElementPositionAt, values)

    print(timeit.timeit(func1, number=no_trys))
    print(timeit.timeit(func2, number=no_trys))


results in 1.32 s for the DataStream and 0.9 s for the map(path.setElementPositionAt, values).

profiling the following C++ snippet results in over 8 s on my machine:

#include <QtCore/QDataStream>
#include <QtGui/QPainterPath>

int function2(const int samples)
    auto size = 8 + samples * 20 + 4;

    std::vector<char> data(size, 0);

    memcpy(, &samples, 4);

    QByteArray buf(QByteArray::fromRawData(, size));
    QDataStream ds(buf);

    float ret;
    for (int counter = 0; counter < samples; counter++)
        int type = 1;
        double x = 0, y = 0;

        ds >> type >> x >> y;
        ret = type + x + y;
    return ret;

int main()
    const int samples = 10000;
    const int tries = 10000;
    int ret = 0;

    auto start = std::chrono::high_resolution_clock::now();

    for (auto counter = 0; counter < tries; counter++)
        ret += function2(samples);
    auto end = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed = end - start;

    std::cout << "done\n";
    std::cout << "Elapsed time: " << elapsed.count() << " s\n";
    std::cout << ret;

    return 0;


The easiest solution is to activate the OpenGL mode i.e. install the PyOpenGL and PyOpenGL-accelerate modules and enable the OpenGL use. This way the createPath part is completely left out. I simply added the following block in my application:

    import OpenGL
    pg.setConfigOption('useOpenGL', True)
    pg.setConfigOption('enableExperimental', True)
except Exception as e:
    print(f"Enabling OpenGL failed with {e}. Will result in slow rendering. Try installing PyOpenGL.")

With that my PC can draw 64 traces with 30000 datapoints without breaking a sweat.

