Insert matplotlib images into a pandas dataframe

冷暖自知 提交于 2019-12-28 16:13:03

问题


PURPOSE: I am currently working with rdkit to colour the structures of my molecules according to rdkit.Chem.Draw.SimilarityMaps. Now, I would like to use the matplotlib images SimilarityMaps function to introduce them in a pandas dataframe and export this table in the form of an html file.

CODE: I tried to do that with the following code

import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import SimilarityMaps
from rdkit.Chem.Draw import IPythonConsole #Needed to show molecules
from rdkit.Chem.Draw.MolDrawing import MolDrawing, DrawingOptions

df = pd.DataFrame({'smiles':['Nc1nc(NC2CC2)c3ncn([C@@H]4C[C@H](CO)C=C4)c3n1','CCCC(=O)Nc1ccc(OCC(O)CNC(C)C)c(c1)C(C)=O','CCN(CC)CCNC(=O)C1=CC=C(C=C1)NC(=O)C','CC(=O)NC1=CC=C(C=C1)O','CC(=O)Nc1sc(nn1)[S](N)(=O)=O']})

def getSim(smi):
    mol = Chem.MolFromSmiles(smi)
    refmol = Chem.MolFromSmiles('c1ccccc1')
    fp = SimilarityMaps.GetMorganFingerprint(mol, fpType='bv')
    fig, maxweight = SimilarityMaps.GetSimilarityMapForFingerprint(refmol, mol, SimilarityMaps.GetMorganFingerprint)
    return fig

df['map'] = df['smiles'].map(getSim)
df.to_html('/.../test.html')

When I open the file test.html, the map column contains the information "Figure (200x200)". I check if my dataframe map column contains object: it's OK in python but not in html file.

QUESTION: I'm not sure how to get a dataframe with images and I'd like to have the help of the community to clarify this subject.

Thanks in advance


回答1:


What you see as Figure (200x200) is the __repr__ string of the matplotlib Figure class. It is the text representation of that python object (the same that you would see when doing print(fig)).

What you want instead is to have an actual image in the table. An easy option would be to save the matplotlib figure as png image, create an html tag, <img src="some.png" /> and hence show the table.

import pandas as pd
import numpy as np;np.random.seed(1)
import matplotlib.pyplot as plt
import matplotlib.colors

df = pd.DataFrame({"info" : np.random.randint(0,10,10), 
                   "status" : np.random.randint(0,3,10)})

cmap = matplotlib.colors.ListedColormap(["crimson","orange","limegreen"])

def createFigure(i):
    fig, ax = plt.subplots(figsize=(.4,.4))
    fig.subplots_adjust(0,0,1,1)
    ax.axis("off")
    ax.axis([0,1,0,1])
    c = plt.Circle((.5,.5), .4, color=cmap(i))
    ax.add_patch(c)
    ax.text(.5,.5, str(i), ha="center", va="center")
    return fig

def mapping(i):
    fig = createFigure(i)
    fname = "data/map_{}.png".format(i)
    fig.savefig(fname)
    imgstr = '<img src="{}" /> '.format(fname)
    return imgstr


df['image'] = df['status'].map(mapping)
df.to_html('test.html', escape=False)

The drawback of this is that you have a lot of images saved somewhere on disk. If this is not desired, you may store the image encoded as base64 in the html file, <img src="data:image/png;base64,iVBORw0KGgoAAAAN..." />.

import pandas as pd
import numpy as np;np.random.seed(1)
import matplotlib.pyplot as plt
import matplotlib.colors
from io import BytesIO
import base64

df = pd.DataFrame({"info" : np.random.randint(0,10,10), 
                   "status" : np.random.randint(0,3,10)})

cmap = matplotlib.colors.ListedColormap(["crimson","orange","limegreen"])

def createFigure(i):
    fig, ax = plt.subplots(figsize=(.4,.4))
    fig.subplots_adjust(0,0,1,1)
    ax.axis("off")
    ax.axis([0,1,0,1])
    c = plt.Circle((.5,.5), .4, color=cmap(i))
    ax.add_patch(c)
    ax.text(.5,.5, str(i), ha="center", va="center")
    return fig

def fig2inlinehtml(fig,i):
    figfile = BytesIO()
    fig.savefig(figfile, format='png')
    figfile.seek(0) 
    figdata_png = base64.b64encode(figfile.getvalue())
    imgstr = '<img src="data:image/png;base64,{}" />'.format(figdata_png)
    return imgstr

def mapping(i):
    fig = createFigure(i)
    return fig2inlinehtml(fig,i)


with pd.option_context('display.max_colwidth', -1):
    df.to_html('test.html', escape=False, formatters=dict(status=mapping))

The output looks the same, but there are no images saved to disk.

This also works nicely in a Jupyter Notebook, with a small modification,

from IPython.display import HTML
# ...
pd.set_option('display.max_colwidth', -1)
HTML(df.to_html(escape=False, formatters=dict(status=mapping)))



来源:https://stackoverflow.com/questions/47038538/insert-matplotlib-images-into-a-pandas-dataframe

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!