问题
I am trying to visualize correlations(similarity score up to 1) between words using networkx
For example similarity scores between dog, cat, animal, person, wolf
Ive tried using this code to plot similarity distances between each word/node
import networkx as nx
import matplotlib.pyplot as plt
G=nx.Graph()
corr_data =([['Dog', 'Dog', 1.0],
['Cat', 'Dog', 0.8016854524612427],
['Wolf', 'Dog', 0.5206573009490967],
['Person', 'Dog', 0.3756750822067261],
['Animal', 'Dog', 0.6618534326553345],
['Cat', 'Cat', 1.0],
['Wolf', 'Cat', 0.5081626176834106],
['Person', 'Cat', 0.32475101947784424],
['Animal', 'Cat', 0.6260400414466858],
['Wolf', 'Wolf', 1.0],
['Person', 'Wolf', 0.23091702163219452],
['Animal', 'Wolf', 0.5261368751525879],
['Person', 'Person', 1.0],
['Animal', 'Person', 0.34220656752586365],
['Animal', 'Animal', 1.0]])
existing_edges = {}
def build_graph(w, lev):
if (lev > 5) :
return
for z in corr_data:
ind=-1
if z[0] == w:
ind=0
ind1=1
if z[1] == w:
ind ==1
ind1 =0
if ind == 0 or ind == 1:
if str(w) + "_" + str(corr_data[ind1]) not in existing_edges :
G.add_node(str(corr_data[ind]))
existing_edges[str(w) + "_" + str(corr_data[ind1])] = 1;
G.add_edge(w,str(corr_data[ind1]))
build_graph(corr_data[ind1], lev+1)
existing_nodes = {}
def build_graph_for_all():
count=0
for d in corr_data:
if (count > 40) :
return
if d[0] not in existing_edges :
G.add_node(str(d[0]))
if d[1] not in existing_edges :
G.add_node(str(d[1]))
G.add_edge(str(d[0]), str(d[1]))
count=count + 1
build_graph_for_all()
print (G.nodes(data=True))
plt.show()
nx.draw(G, width=2, with_labels=True)
plt.savefig("path1.png")
w="design"
G.add_node(w)
build_graph(w, 0)
print (G.nodes(data=True))
plt.show()
nx.draw(G, width=2, with_labels=True)
plt.savefig("path.png")
The distances between my nodes look off i.e. Cat and Person plotting closer than Cat and Dog. Am I missing something obvious here?
回答1:
There are a few things you'll need to fix. Most importantly, you can read up on how networkx draws networks using the spring layout. Basically what you need to do is to add the correlation values to your network edges.
You can do this by replacing your G.add_edge line in build_graph_for_all by:
G.add_weighted_edges_from([[str(d[0]), str(d[1]),d[2]]])
You can plot your network using the following code, which should take the edge weights into account.
pos = nx.spring_layout(G,weight='weight')
nx.draw(G,pos=pos, width=2, with_labels=True)
If you really want to emphasize the different between high and low correlation edges, you can transform the weights as follows:
corr_data = [[x[0],x1,1000**(x2)] for x in corr_data]
This will stretch out your weights - a low correlation of .3 will be mapped to around 8, and a perfect correlation of 1 will be mapped to 1000. Remaking the graph with these new weights and repeating the plotting code above yields the following image:
Edit: Here is the merged code:
import networkx as nx
import matplotlib.pyplot as plt
G=nx.Graph()
corr_data =([['Dog', 'Dog', 1.0],
['Cat', 'Dog', 0.8016854524612427],
['Wolf', 'Dog', 0.5206573009490967],
['Person', 'Dog', 0.3756750822067261],
['Animal', 'Dog', 0.6618534326553345],
['Cat', 'Cat', 1.0],
['Wolf', 'Cat', 0.5081626176834106],
['Person', 'Cat', 0.32475101947784424],
['Animal', 'Cat', 0.6260400414466858],
['Wolf', 'Wolf', 1.0],
['Person', 'Wolf', 0.23091702163219452],
['Animal', 'Wolf', 0.5261368751525879],
['Person', 'Person', 1.0],
['Animal', 'Person', 0.34220656752586365],
['Animal', 'Animal', 1.0]])
corr_data = [[x[0],x[1],1000**(x[2])] for x in corr_data]
existing_edges = {}
def build_graph(w, lev):
if (lev > 5) :
return
for z in corr_data:
ind=-1
if z[0] == w:
ind=0
ind1=1
if z[1] == w:
ind ==1
ind1 =0
if ind == 0 or ind == 1:
if str(w) + "_" + str(corr_data[ind1]) not in existing_edges :
G.add_node(str(corr_data[ind]))
existing_edges[str(w) + "_" + str(corr_data[ind1])] = 1;
G.add_edge(w,str(corr_data[ind1]))
build_graph(corr_data[ind1], lev+1)
existing_nodes = {}
def build_graph_for_all():
count=0
for d in corr_data:
if (count > 40) :
return
if d[0] not in existing_edges :
G.add_node(str(d[0]))
if d[1] not in existing_edges :
G.add_node(str(d[1]))
G.add_weighted_edges_from([[str(d[0]), str(d[1]),d[2]]])
count=count + 1
build_graph_for_all()
pos = nx.spring_layout(G,weight='weight')
nx.draw(G,pos=pos, width=2, with_labels=True)
plt.savefig("path1.png")
来源:https://stackoverflow.com/questions/56717750/plotting-similarity-matrix-using-networkx