Can I export a tensorflow summary to CSV? - csv

Is there a way to extract scalar summaries to CSV (preferably from within tensorboard) from tfevents files?
Example code
The following code generates tfevent files in a summary_dir within the same directory. Suppose you let it run and you find something interesting. You want to get the raw data for further investigation. How would you do that?
#!/usr/bin/env python
"""A very simple MNIST classifier."""
import argparse
import sys
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
ce_with_logits = tf.nn.softmax_cross_entropy_with_logits
FLAGS = None
def inference(x):
"""
Build the inference graph.
Parameters
----------
x : placeholder
Returns
-------
Output tensor with the computed logits.
"""
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b
return y
def loss(logits, labels):
"""
Calculate the loss from the logits and the labels.
Parameters
----------
logits : Logits tensor, float - [batch_size, NUM_CLASSES].
labels : Labels tensor, int32 - [batch_size]
"""
cross_entropy = tf.reduce_mean(ce_with_logits(labels=labels,
logits=logits))
return cross_entropy
def training(loss, learning_rate=0.5):
"""
Set up the training Ops.
Parameters
----------
loss : Loss tensor, from loss().
learning_rate : The learning rate to use for gradient descent.
Returns
-------
train_op: The Op for training.
"""
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_step = optimizer.minimize(loss)
return train_step
def main(_):
# Import data
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
y = inference(x)
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
loss_ = loss(logits=y, labels=y_)
train_step = training(loss_)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.name_scope('accuracy'):
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()
sess = tf.InteractiveSession()
train_writer = tf.summary.FileWriter('summary_dir/train', sess.graph)
test_writer = tf.summary.FileWriter('summary_dir/test', sess.graph)
tf.global_variables_initializer().run()
for train_step_i in range(100000):
if train_step_i % 100 == 0:
summary, acc = sess.run([merged, accuracy],
feed_dict={x: mnist.test.images,
y_: mnist.test.labels})
test_writer.add_summary(summary, train_step_i)
summary, acc = sess.run([merged, accuracy],
feed_dict={x: mnist.train.images,
y_: mnist.train.labels})
train_writer.add_summary(summary, train_step_i)
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
print(sess.run(accuracy, feed_dict={x: mnist.test.images,
y_: mnist.test.labels}))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir',
type=str,
default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

While the answer here is as requested within tensorboard it only allows to download a csv for a single run of a single tag.
If you have for example 10 tags and 20 runs (what is not at all much) you would need to do the above step 200 times (that alone will probably take you more than a hour).
If now you for some reason would like to actually do something with the data for all runs for a single tag you would need to write some weird CSV accumulation script or copy everything by hand (what will probably cost you more than a day).
Therefore I would like to add a solution that extracts a CSV file for every tag with all runs contained. Column headers are the run path names and row indices are the run step numbers.
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
def tabulate_events(dpath):
summary_iterators = [EventAccumulator(os.path.join(dpath, dname)).Reload() for dname in os.listdir(dpath)]
tags = summary_iterators[0].Tags()['scalars']
for it in summary_iterators:
assert it.Tags()['scalars'] == tags
out = defaultdict(list)
steps = []
for tag in tags:
steps = [e.step for e in summary_iterators[0].Scalars(tag)]
for events in zip(*[acc.Scalars(tag) for acc in summary_iterators]):
assert len(set(e.step for e in events)) == 1
out[tag].append([e.value for e in events])
return out, steps
def to_csv(dpath):
dirs = os.listdir(dpath)
d, steps = tabulate_events(dpath)
tags, values = zip(*d.items())
np_values = np.array(values)
for index, tag in enumerate(tags):
df = pd.DataFrame(np_values[index], index=steps, columns=dirs)
df.to_csv(get_file_path(dpath, tag))
def get_file_path(dpath, tag):
file_name = tag.replace("/", "_") + '.csv'
folder_path = os.path.join(dpath, 'csv')
if not os.path.exists(folder_path):
os.makedirs(folder_path)
return os.path.join(folder_path, file_name)
if __name__ == '__main__':
path = "path_to_your_summaries"
to_csv(path)
My solution builds upon: https://stackoverflow.com/a/48774926/2230045
EDIT:
I created a more sophisticated version and released it on GitHub: https://github.com/Spenhouet/tensorboard-aggregator
This version aggregates multiple tensorboard runs and is able to save the aggregates to a new tensorboard summary or as a .csv file.

Just check the "Data download links" option on the upper-left in TensorBoard, and then click on the "CSV" button that will appear under your scalar summary.

Here is my solution which bases on the previous solutions but can scale up.
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
def tabulate_events(dpath):
final_out = {}
for dname in os.listdir(dpath):
print(f"Converting run {dname}",end="")
ea = EventAccumulator(os.path.join(dpath, dname)).Reload()
tags = ea.Tags()['scalars']
out = {}
for tag in tags:
tag_values=[]
wall_time=[]
steps=[]
for event in ea.Scalars(tag):
tag_values.append(event.value)
wall_time.append(event.wall_time)
steps.append(event.step)
out[tag]=pd.DataFrame(data=dict(zip(steps,np.array([tag_values,wall_time]).transpose())), columns=steps,index=['value','wall_time'])
if len(tags)>0:
df= pd.concat(out.values(),keys=out.keys())
df.to_csv(f'{dname}.csv')
print("- Done")
else:
print('- Not scalers to write')
final_out[dname] = df
return final_out
if __name__ == '__main__':
path = "youre/path/here"
steps = tabulate_events(path)
pd.concat(steps.values(),keys=steps.keys()).to_csv('all_result.csv')

Very minimal example:
import pandas as pd
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
log_dir = "lightning_logs/version_1"
event_accumulator = EventAccumulator(log_dir)
event_accumulator.Reload()
events = event_accumulator.Scalars("train_loss")
x = [x.step for x in events]
y = [x.value for x in events]
df = pd.DataFrame({"step": x, "train_loss": y})
df.to_csv("train_loss.csv")
print(df)
step train_loss
0 0 700.491516
1 1 163.593246
2 2 146.365448
3 3 153.830215
...
Plotting loss vs epochs example:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
log_dir = "lightning_logs/version_1"
y_key = "val_loss"
event_accumulator = EventAccumulator(log_dir)
event_accumulator.Reload()
steps = {x.step for x in event_accumulator.Scalars("epoch")}
x = list(range(len(steps)))
y = [x.value for x in event_accumulator.Scalars(y_key) if x.step in steps]
df = pd.DataFrame({"epoch": x, y_key: y})
df.to_csv(f"{y_key}.csv")
fig, ax = plt.subplots()
sns.lineplot(data=df, x="epoch", y=y_key)
fig.savefig("plot.png", dpi=300)

Just to add to #Spen
in case you want to export the data when you have varying numbers of steps.
This will make one large csv file.
Might need to change around the keys for it to work for you.
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import glob
import pandas as pd
listOutput = (glob.glob("*/"))
listDF = []
for tb_output_folder in listOutput:
print(tb_output_folder)
x = EventAccumulator(path=tb_output_folder)
x.Reload()
x.FirstEventTimestamp()
keys = ['loss', 'mean_absolute_error', 'val_loss', 'val_mean_absolute_error']
listValues = {}
steps = [e.step for e in x.Scalars(keys[0])]
wall_time = [e.wall_time for e in x.Scalars(keys[0])]
index = [e.index for e in x.Scalars(keys[0])]
count = [e.count for e in x.Scalars(keys[0])]
n_steps = len(steps)
listRun = [tb_output_folder] * n_steps
printOutDict = {}
data = np.zeros((n_steps, len(keys)))
for i in range(len(keys)):
data[:,i] = [e.value for e in x.Scalars(keys[i])]
printOutDict = {keys[0]: data[:,0], keys[1]: data[:,1],keys[2]: data[:,2],keys[3]: data[:,3]}
printOutDict['Name'] = listRun
DF = pd.DataFrame(data=printOutDict)
listDF.append(DF)
df = pd.concat(listDF)
df.to_csv('Output.csv')

Related

How to download a trained model as a pickle file using Streamlit Download Button?

How do I download a trained model as a pickle file using Streamlit Download Button?
You can use io.BytesIO to store the pickled data inside bytes in RAM. Then, give these bytes as data argument in the st.download_button function.
import io
import pickle
import streamlit as st
def create_model():
"""Create an sklearn model so that we will have
something interesting to pickle.
Example taken from here:
https://machinelearningmastery.com/save-load-machine-learning-models-python-scikit-learn/
"""
import pandas as pd
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ["preg", "plas", "pres", "skin", "test", "mass", "pedi", "age", "class"]
dataframe = pd.read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
test_size = 0.33
seed = 7
X_train, _, Y_train, _ = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)
# Fit the model on training set
model = LogisticRegression()
model.fit(X_train, Y_train)
return model
def pickle_model(model):
"""Pickle the model inside bytes. In our case, it is the "same" as
storing a file, but in RAM.
"""
f = io.BytesIO()
pickle.dump(model, f)
return f
st.title("My .pkl downloader")
model = create_model()
data = pickle_model(model)
st.download_button("Download .pkl file", data=data, file_name="my-pickled-model.pkl")

Issues in creating pysimplegui or Tkinter graph GUI by reading csv file , cleaning it and plotting graph (histogram+PDF)

I want to create GUI which should automatically clean data in csv file once selected and plot superimposed PDF & histogram graph. I have uploaded basic python program which generates the required graph but I am unbale to convert it into interface. I guess, only "open file" & "plot" buttons would suffice the requirement. image- want to retrieve data from 'N'th column (13) only with skipping top 4 rows
I am basically from metallurgy background and trying my hands in this field.
Any help would be greatly appreciated
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
raw_data = pd.read_csv("D:/Project/Python/NDC/Outlier_ND/800016_DAT.csv",skiprows=4,header=None)
clean = pd.DataFrame(raw_data)
data1 = clean.iloc[:, [13]]
Q1 = data1.quantile(0.25)
Q3 = data1.quantile(0.75)
IQR = Q3 - Q1
data_IQR = data1[~((data1 < (Q1 - 1.5 * IQR)) |(data1 > (Q3 + 1.5 * IQR))).any(axis=1)]
data_IQR.shape
print(data1.shape)
print(data_IQR.shape)
headerList = ['Actual_MR']
data_IQR.to_csv(r'D:\Project\Python\NDC\Outlier_ND\800016_DAT_IQR.csv', header=headerList, index=False)
data = pd.read_csv("D:/Project/Python/NDC/Outlier_ND/800016_DAT_IQR.csv")
mean, sd = norm.fit(data)
plt.hist(data, bins=25, density=True, alpha=0.6, facecolor = '#2ab0ff', edgecolor='#169acf', linewidth=0.5)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mean, sd)
plt.plot(x, p, 'red', linewidth=2)
title = " Graph \n mean: {:.2f} and SD: {:.2f}".format(mean, sd)
plt.title(title)
plt.xlabel('MR')
plt.ylabel('Pr')
plt.show()
Following code demo how PySimpleGUI to work with matplotlib, detail please find all remark in script.
import math, random
from pathlib import Path
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
import PySimpleGUI as sg
# 1. Define the class as the interface between matplotlib and PySimpleGUI
class Canvas(FigureCanvasTkAgg):
"""
Create a canvas for matplotlib pyplot under tkinter/PySimpleGUI canvas
"""
def __init__(self, figure=None, master=None):
super().__init__(figure=figure, master=master)
self.canvas = self.get_tk_widget()
self.canvas.pack(side='top', fill='both', expand=1)
# 2. create PySimpleGUI window, a fixed-size Frame with Canvas which expand in both x and y.
font = ("Courier New", 11)
sg.theme("DarkBlue3")
sg.set_options(font=font)
layout = [
[sg.Input(expand_x=True, key='Path'),
sg.FileBrowse(file_types=(("ALL CSV Files", "*.csv"), ("ALL Files", "*.*"))),
sg.Button('Plot')],
[sg.Frame("", [[sg.Canvas(background_color='green', expand_x=True, expand_y=True, key='Canvas')]], size=(640, 480))],
[sg.Push(), sg.Button('Exit')]
]
window = sg.Window('Matplotlib', layout, finalize=True)
# 3. Create a matplotlib canvas under sg.Canvas or sg.Graph
fig = Figure(figsize=(5, 4), dpi=100)
ax = fig.add_subplot()
canvas = Canvas(fig, window['Canvas'].Widget)
# 4. initial for figure
ax.set_title(f"Sensor Data")
ax.set_xlabel("X axis")
ax.set_ylabel("Y axis")
ax.set_xlim(0, 1079)
ax.set_ylim(-1.1, 1.1)
ax.grid()
canvas.draw() # do Update to GUI canvas
# 5. PySimpleGUI event loop
while True:
event, values = window.read()
if event in (sg.WINDOW_CLOSED, 'Exit'):
break
elif event == 'Plot':
"""
path = values['Path']
if not Path(path).is_file():
continue
"""
# 6. Get data from path and plot from here
ax.cla() # Clear axes first if required
ax.set_title(f"Sensor Data")
ax.set_xlabel("X axis")
ax.set_ylabel("Y axis")
ax.grid()
theta = random.randint(0, 359)
x = [degree for degree in range(1080)]
y = [math.sin((degree+theta)/180*math.pi) for degree in range(1080)]
ax.plot(x, y)
canvas.draw() # do Update to GUI canvas
# 7. Close window to exit
window.close()

Dumping numpy arrays into neural network

I am trying to solve the titanic machine learning challenge from kaggle using a neural network. I removed most of the irrelevant data and converted the useful data into a 2D numpy array while the survival is converted into a 1D numpy array. For some reason it throws an error saying dimension 0 in both shape must be equal, I've been trying to solve it for quite a while and I hope that you guys can help out.
TensorFlowNumpy.py
import tensorflow as tf
def numpy2tensor(numpy):
sess = tf.Session()
with sess.as_default():
return tf.constant(numpy)
def tensor2numpy(tensor):
sess = tf.Session()
with sess.as_default():
return tensor.eval()
Dataset.py
import pandas
import numpy as np
dataset = pandas.read_csv('train.csv')
dataset2= dataset.drop(['PassengerId','Survived','Name','Ticket','Fare','Cabin','Embarked'],axis=1)
dataset3= dataset2.fillna(0)
survive = pandas.read_csv('train.csv')
survival = np.float32(survive.Survived)
dataset4 = np.float32(dataset3)
MainCode.py
import tensorflow as tf
import numpy
from dataset import dataset4,survival
from sklearn.model_selection import train_test_split
from TensorFlowNumpy import numpy2tensor
train_x,test_x,train_y,test_y = train_test_split(dataset4,survival,test_size
= 0.2)
tensor_train_x = numpy2tensor(train_x)
tensor_train_y = numpy2tensor(train_y)
tensor_test_x = numpy2tensor(test_x)
tensor_test_y = numpy2tensor(test_y)
n_nodes_hl1 = 10
n_nodes_hl2 = 10
n_classes = 2
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)
def neural_network_model(data):
hidden_1_layer = {'weights':tf.Variable(tf.random_normal([5,
n_nodes_hl1])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl1]))}
hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1,
n_nodes_hl2])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl2]))}
output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2,
n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes]))}
l1 = tf.add(tf.matmul(data,hidden_1_layer['weights']),
hidden_1_layer['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1,hidden_2_layer['weights']),
hidden_2_layer['biases'])
l2 = tf.nn.relu(l2)
output = tf.matmul(l2,output_layer['weights']) + output_layer['biases']
return output
def train_neural_network(x):
prediction = neural_network_model(x)
cost =
tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,
labels=tensor_train_y))
optimizer1 = tf.train.GradientDescentOptimizer(0.001).minimize(cost)
hm_epochs = 100
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
_, c = sess.run([optimizer1, cost], feed_dict={x:tensor_train_x,
y:tensor_train_y})
epoch_loss += c
print('Epoch', epoch+1, 'completed out
of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:test_x, y:test_y}))
train_neural_network(tensor_train_x)
I have faced this error several times, problem is obviously in our code. I didn't look through your code thoroughly as i am leaving for the day, but i smell that your dependent variable/ output variable shape is [1,712] which should be [712,1] so some where in the code try to fix it. Basically what it meant is you are having one row with 712 columns but it should be 712 rows with 1 column(output). Please mark this as answer if it helps. Ping me tomorrow if problem still exists. I will take a look at it.

EM-Algorithm with Pykalman

I am trying to implement a simpel application of the Kalman Filter using Pykalman, but I am getting an error on the estimation step of the EM-Algorithm that comes with the Pykalman package.
It is a simple linear regression with time-varying coefficient, based on simulated data. The code below simulates the data and starts the kalman filter, but when I try to estimate the parameters based on the observations, using kf.em(Data), it returns the error: ValueError: object arrays are not supported.
Am I doing something wrong with pykalman?
Model and full code below. The error occurs on the last line of the code.
Model (small images)
Description of the problem
State-Space representation of the model
Full Code
import pandas as pd
import scipy as sp
import numpy as np
import matplotlib.pyplot as plt
import pylab as pl
from pykalman import KalmanFilter
# generates the data
Data = pd.DataFrame(columns=['NoiseAR','NoiseReg', 'x', 'beta', 'y'], index=range(1000))
Data['NoiseAR'] = np.random.normal(loc=0.0, scale=1.0, size=1000)
Data['NoiseReg'] = np.random.normal(loc=0.0, scale=1.0, size=1000)
for i in range(1000):
if i==0:
Data.loc[i, 'x'] = Data.loc[i, 'NoiseAR']
else:
Data.loc[i, 'x'] = 0.95*Data.loc[i-1, 'x'] + Data.loc[i, 'NoiseAR']
for i in range(1000):
Data.loc[i, 'beta'] = np.sin(np.radians(i))
Data['y'] = Data['x']*Data['beta'] + Data['NoiseReg']
# set up the kalman filter
F = [1.]
H = Data['x'].values.reshape(1000,1,1)
Q = [2.]
R = [2.]
init_state_mean = [0.]
init_state_cov = [2.]
kf = KalmanFilter(
transition_matrices=F,
observation_matrices=H,
transition_covariance=Q,
observation_covariance=R,
initial_state_mean=init_state_mean,
initial_state_covariance=init_state_cov,
em_vars=['transition_covariance', 'observation_covariance', 'initial_state_mean', 'initial_state_covariance']
)
# estimate the parameters from em_vars using the EM algorithm
kf = kf.em(Data['y'].values)
I figured it out! Data['y'].values is a numpy array with dtype=object. All I had to do is to change the type of the array to float using .astype(float). This has to be done with everything that goes into the kalman filter object of pykalman, so I also had to change the type of the H matrix.
Hope this helps somebody in the future!
Here is what the final working code looks like:
import pandas as pd
import scipy as sp
import numpy as np
import matplotlib.pyplot as plt
import pylab as pl
from pykalman import KalmanFilter
Data = pd.DataFrame(columns=['NoiseAR','NoiseReg', 'x', 'beta', 'y'], index=range(1000))
Data['NoiseAR'] = np.random.normal(loc=0.0, scale=1.0, size=1000)
Data['NoiseReg'] = np.random.normal(loc=0.0, scale=1.0, size=1000)
plt.plot(Data[['NoiseAR','NoiseReg']])
plt.show()
for i in range(1000):
if i == 0:
Data.loc[i, 'x'] = Data.loc[i, 'NoiseAR']
else:
Data.loc[i, 'x'] = 0.95 * Data.loc[i - 1, 'x'] + Data.loc[i, 'NoiseAR']
plt.plot(Data['x'])
plt.show()
for i in range(1000):
Data.loc[i, 'beta'] = np.sin(np.radians(i))
plt.plot(Data['beta'])
plt.show()
Data['y'] = Data['x']*Data['beta'] + Data['NoiseReg']
plt.plot(Data[['x', 'y']])
plt.show()
F = [1.]
H = Data['x'].values.reshape(1000,1,1).astype(float)
Q = [2.]
R = [2.]
init_state_mean = [0.]
init_state_cov = [2.]
kf = KalmanFilter(
transition_matrices=F,
observation_matrices=H,
transition_covariance=Q,
observation_covariance=R,
initial_state_mean=init_state_mean,
initial_state_covariance=init_state_cov,
em_vars=['transition_covariance', 'observation_covariance', 'initial_state_mean', 'initial_state_covariance']
)
kf = kf.em(Data['y'].values.astype(float))
filtered_state_estimates = kf.filter(Data['y'].values.astype(float))[0]
smoothed_state_estimates = kf.smooth(Data['y'].values.astype(float))[0]
pl.figure(figsize=(10, 6))
lines_true = pl.plot(Data['beta'].values, linestyle='-', color='b')
lines_filt = pl.plot(filtered_state_estimates, linestyle='--', color='g')
lines_smooth = pl.plot(smoothed_state_estimates, linestyle='-.', color='r')
pl.legend(
(lines_true[0], lines_filt[0], lines_smooth[0]),
('true', 'filtered', 'smoothed')
)
pl.xlabel('time')
pl.ylabel('state')
pl.show()

NetworkX and Wordnet

Managed to create a hypernym graph but keep on getting bound method Synset.name of Synset('dog.n') instead of dog.n in the graph. Where is the mistake?
from nltk.corpus import wordnet as wn
import networkx as nx
import matplotlib.pyplot as plt
def closure_graph(synset, fn):
seen = set()
graph = nx.DiGraph()
def recurse(s):
if not s in seen:
seen.add(s)
graph.add_node(s.name)
for s1 in fn(s):
graph.add_node(s1.name)
graph.add_edge(s.name, s1.name)
recurse(s1)
recurse(synset)
return graph
dog = wn.synsets('dog')[0]
G = closure_graph(dog,
lambda s: s.hypernyms())
index = nx.betweenness_centrality(G)
plt.rc('figure', figsize=(12, 7))
node_size = [index[n]*1000 for n in G]
pos = nx.spring_layout(G)
nx.draw_networkx(G, pos, node_size=node_size, edge_color='r', alpha=.3, linewidths=0)
plt.show()
Edit 1:
Managed to create a hypernym graph but keep on getting bound method Synset.name of Synset('dog.n') instead of dog.n in the graph. Where is the mistake?
So what is printing out is that networkx has created a bunch of nodes, each node being a function. So we need to look at the command that is adding nodes. This occurs in closure_graph.
In the definition of closure_graph, we see that s.name is added as a node. This is a function (it probably used to actually be the name before nltk was updated). Instead you want to add s.name() which is now a function that returns the name. There are 4 places where this occurs.
from nltk.corpus import wordnet as wn
import networkx as nx
import matplotlib.pyplot as plt
def closure_graph(synset, fn):
seen = set()
graph = nx.DiGraph()
def recurse(s):
if not s in seen:
seen.add(s)
graph.add_node(s.name())
for s1 in fn(s):
graph.add_node(s1.name())
graph.add_edge(s.name(), s1.name())
recurse(s1)
recurse(synset)
return graph
dog = wn.synsets('dog')[0]
G = closure_graph(dog,
lambda s: s.hypernyms())
index = nx.betweenness_centrality(G)
plt.rc('figure', figsize=(12, 7))
node_size = [index[n]*1000 for n in G]
pos = nx.spring_layout(G)
nx.draw_networkx(G, pos, node_size=node_size, edge_color='r', alpha=.3, linewidths=0)
plt.show()