Plotting similarity matrix using Networkx

|▌冷眼眸甩不掉的悲伤 提交于 2021-02-09 07:09:10

问题


I am trying to visualize correlations(similarity score up to 1) between words using networkx

For example similarity scores between dog, cat, animal, person, wolf

Ive tried using this code to plot similarity distances between each word/node

import networkx as nx
import matplotlib.pyplot as plt
G=nx.Graph()

corr_data =([['Dog', 'Dog', 1.0],
       ['Cat', 'Dog', 0.8016854524612427],
       ['Wolf', 'Dog', 0.5206573009490967],
       ['Person', 'Dog', 0.3756750822067261],
       ['Animal', 'Dog', 0.6618534326553345],
       ['Cat', 'Cat', 1.0],
       ['Wolf', 'Cat', 0.5081626176834106],
       ['Person', 'Cat', 0.32475101947784424],
       ['Animal', 'Cat', 0.6260400414466858],
       ['Wolf', 'Wolf', 1.0],
       ['Person', 'Wolf', 0.23091702163219452],
       ['Animal', 'Wolf', 0.5261368751525879],
       ['Person', 'Person', 1.0],
       ['Animal', 'Person', 0.34220656752586365],
       ['Animal', 'Animal', 1.0]])

existing_edges = {}

def build_graph(w, lev):
  if (lev > 5)  :
      return
  for z in corr_data:
     ind=-1 
     if z[0] == w:
         ind=0
         ind1=1
     if z[1] == w:
         ind ==1
         ind1 =0

     if ind == 0 or ind == 1:
         if  str(w) + "_" + str(corr_data[ind1]) not in existing_edges :

             G.add_node(str(corr_data[ind]))
             existing_edges[str(w) + "_" + str(corr_data[ind1])] = 1;
             G.add_edge(w,str(corr_data[ind1]))

             build_graph(corr_data[ind1], lev+1)


existing_nodes = {}
def build_graph_for_all():
    count=0
    for d in corr_data:
        if (count > 40) :
            return
        if  d[0] not in existing_edges :
             G.add_node(str(d[0]))
        if  d[1] not in existing_edges :     
             G.add_node(str(d[1]))
        G.add_edge(str(d[0]), str(d[1]))     
        count=count + 1


build_graph_for_all()

print (G.nodes(data=True))
plt.show()
nx.draw(G, width=2, with_labels=True)
plt.savefig("path1.png")


w="design"
G.add_node(w)
build_graph(w, 0)

print (G.nodes(data=True))
plt.show()
nx.draw(G, width=2, with_labels=True)
plt.savefig("path.png")

The distances between my nodes look off i.e. Cat and Person plotting closer than Cat and Dog. Am I missing something obvious here?


回答1:


There are a few things you'll need to fix. Most importantly, you can read up on how networkx draws networks using the spring layout. Basically what you need to do is to add the correlation values to your network edges.

  1. You can do this by replacing your G.add_edge line in build_graph_for_all by:

    G.add_weighted_edges_from([[str(d[0]), str(d[1]),d[2]]])     
    
  2. You can plot your network using the following code, which should take the edge weights into account.

    pos = nx.spring_layout(G,weight='weight')

    nx.draw(G,pos=pos, width=2, with_labels=True)

  3. If you really want to emphasize the different between high and low correlation edges, you can transform the weights as follows:

    corr_data = [[x[0],x1,1000**(x2)] for x in corr_data]

This will stretch out your weights - a low correlation of .3 will be mapped to around 8, and a perfect correlation of 1 will be mapped to 1000. Remaking the graph with these new weights and repeating the plotting code above yields the following image:

Edit: Here is the merged code:

import networkx as nx
import matplotlib.pyplot as plt
G=nx.Graph()

corr_data =([['Dog', 'Dog', 1.0],
       ['Cat', 'Dog', 0.8016854524612427],
       ['Wolf', 'Dog', 0.5206573009490967],
       ['Person', 'Dog', 0.3756750822067261],
       ['Animal', 'Dog', 0.6618534326553345],
       ['Cat', 'Cat', 1.0],
       ['Wolf', 'Cat', 0.5081626176834106],
       ['Person', 'Cat', 0.32475101947784424],
       ['Animal', 'Cat', 0.6260400414466858],
       ['Wolf', 'Wolf', 1.0],
       ['Person', 'Wolf', 0.23091702163219452],
       ['Animal', 'Wolf', 0.5261368751525879],
       ['Person', 'Person', 1.0],
       ['Animal', 'Person', 0.34220656752586365],
       ['Animal', 'Animal', 1.0]])

corr_data = [[x[0],x[1],1000**(x[2])] for x in corr_data]

existing_edges = {}

def build_graph(w, lev):
  if (lev > 5)  :
      return
  for z in corr_data:
     ind=-1 
     if z[0] == w:
         ind=0
         ind1=1
     if z[1] == w:
         ind ==1
         ind1 =0

     if ind == 0 or ind == 1:
         if  str(w) + "_" + str(corr_data[ind1]) not in existing_edges :

             G.add_node(str(corr_data[ind]))
             existing_edges[str(w) + "_" + str(corr_data[ind1])] = 1;
             G.add_edge(w,str(corr_data[ind1]))

             build_graph(corr_data[ind1], lev+1)


existing_nodes = {}
def build_graph_for_all():
    count=0
    for d in corr_data:
        if (count > 40) :
            return
        if  d[0] not in existing_edges :
             G.add_node(str(d[0]))
        if  d[1] not in existing_edges :     
             G.add_node(str(d[1]))
        G.add_weighted_edges_from([[str(d[0]), str(d[1]),d[2]]])     
        count=count + 1


build_graph_for_all()

pos = nx.spring_layout(G,weight='weight')
nx.draw(G,pos=pos, width=2, with_labels=True)

plt.savefig("path1.png")


来源:https://stackoverflow.com/questions/56717750/plotting-similarity-matrix-using-networkx

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!