The Tensorflow Object_detection API 's visualize don't work - tensorflow-slim

when I am using the API of Object_detection,I followed the instruction ,everything is fine .However ,when I begin to test my picture , I met a problem , it seems that the function named
" visualize_boxes_and_labels_on_image_array " ( in the 57 line ) didn't work . Here is my source codes
import cv2
import numpy as np
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
class TOD(object):
def __init__(self):
self.PATH_TO_CKPT = '/home/xiyou/Desktop/ssd_training/result/frozen_inference_graph.pb'
self.PATH_TO_LABELS = '/home/xiyou/Desktop/ssd_training/detection_for_smoke.pbtxt'
self.NUM_CLASSES = 1
self.detection_graph = self._load_model()
self.category_index = self._load_label_map()
def _load_model(self):
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(self.PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
return detection_graph
def _load_label_map(self):
label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map,
max_num_classes=self.NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
return category_index
def detect(self, image):
with self.detection_graph.as_default():
with tf.Session(graph=self.detection_graph) as sess:
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image, axis=0)
image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0')
boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0')
scores = self.detection_graph.get_tensor_by_name('detection_scores:0')
classes = self.detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
print(boxes, scores, classes, num_detections)
#print(np.squeeze(boxes))
# Visualization of the results of a detection.
#######Here is the problem
# image1 = vis_util.visualize_boxes_and_labels_on_image_array(
image, #######Here is the problem
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
self.category_index,
use_normalized_coordinates=True,
line_thickness=50,
)
#print(np.squeeze(boxes),np.squeeze(classes))
cv2.namedWindow("detection")
cv2.imshow("detection", image1)
cv2.waitKey(0)
if __name__ == '__main__':
image = cv2.imread('/home/xiyou/Pictures/timg1.jpg')
detecotr = TOD()
detecotr.detect(image)
when I run this code , the image did show ,but nothing changed , no detected area in the pic and no an other informations . the input pic is the same as the out image . But when I was Debug , I found the Varibles such as soucres , classes , boxes do have values.
Is anyone can help me ? Thanks!!!
And my Tensorflow version is 1.4.0 , CUDA 8.0 in Ubuntu 16.04

Related

Problem with PettingZoo and Stable-Baselines3 with a ParallelEnv

I am having trouble in making things work with a Custom ParallelEnv I wrote by using PettingZoo. I am using SuperSuit's ss.pettingzoo_env_to_vec_env_v1(env) as a wrapper to Vectorize the environment and make it work with Stable-Baseline3 and documented here.
You can find attached a summary of the most relevant part of the code:
from typing import Optional
from gym import spaces
import random
import numpy as np
from pettingzoo import ParallelEnv
from pettingzoo.utils.conversions import parallel_wrapper_fn
import supersuit as ss
from gym.utils import EzPickle, seeding
def env(**kwargs):
env_ = parallel_env(**kwargs)
env_ = ss.pettingzoo_env_to_vec_env_v1(env_)
#env_ = ss.concat_vec_envs_v1(env_, 1)
return env_
petting_zoo = env
class parallel_env(ParallelEnv, EzPickle):
metadata = {'render_modes': ['ansi'], "name": "PlayerEnv-Multi-v0"}
def __init__(self, n_agents: int = 20, new_step_api: bool = True) -> None:
EzPickle.__init__(
self,
n_agents,
new_step_api
)
self._episode_ended = False
self.n_agents = n_agents
self.possible_agents = [
f"player_{idx}" for idx in range(n_agents)]
self.agents = self.possible_agents[:]
self.agent_name_mapping = dict(
zip(self.possible_agents, list(range(len(self.possible_agents))))
)
self.observation_spaces = spaces.Dict(
{agent: spaces.Box(shape=(len(self.agents),),
dtype=np.float64, low=0.0, high=1.0) for agent in self.possible_agents}
)
self.action_spaces = spaces.Dict(
{agent: spaces.Discrete(4) for agent in self.possible_agents}
)
self.current_step = 0
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
def observation_space(self, agent):
return self.observation_spaces[agent]
def action_space(self, agent):
return self.action_spaces[agent]
def __calculate_observation(self, agent_id: int) -> np.ndarray:
return self.observation_space(agent_id).sample()
def __calculate_observations(self) -> np.ndarray:
observations = {
agent: self.__calculate_observation(
agent_id=agent)
for agent in self.agents
}
return observations
def observe(self, agent):
return self.__calculate_observation(agent_id=agent)
def step(self, actions):
if self._episode_ended:
return self.reset()
observations = self.__calculate_observations()
rewards = random.sample(range(100), self.n_agents)
self.current_step += 1
self._episode_ended = self.current_step >= 100
infos = {agent: {} for agent in self.agents}
dones = {agent: self._episode_ended for agent in self.agents}
rewards = {
self.agents[i]: rewards[i]
for i in range(len(self.agents))
}
if self._episode_ended:
self.agents = {} # To satisfy `set(par_env.agents) == live_agents`
return observations, rewards, dones, infos
def reset(self,
seed: Optional[int] = None,
return_info: bool = False,
options: Optional[dict] = None,):
self.agents = self.possible_agents[:]
self._episode_ended = False
self.current_step = 0
observations = self.__calculate_observations()
return observations
def render(self, mode="human"):
# TODO: IMPLEMENT
print("TO BE IMPLEMENTED")
def close(self):
pass
Unfortunately when I try to test with the following main procedure:
from stable_baselines3 import DQN, PPO
from stable_baselines3.common.env_checker import check_env
from dummy_env import dummy
from pettingzoo.test import parallel_api_test
if __name__ == '__main__':
# Testing the parallel algorithm alone
env_parallel = dummy.parallel_env()
parallel_api_test(env_parallel) # This works!
# Testing the environment with the wrapper
env = dummy.petting_zoo()
# ERROR: AssertionError: The observation returned by the `reset()` method does not match the given observation space
check_env(env)
# Model initialization
model = PPO("MlpPolicy", env, verbose=1)
# ERROR: ValueError: could not broadcast input array from shape (20,20) into shape (20,)
model.learn(total_timesteps=10_000)
I get the following error:
AssertionError: The observation returned by the `reset()` method does not match the given observation space
If I skip check_env() I get the following one:
ValueError: could not broadcast input array from shape (20,20) into shape (20,)
It seems like that ss.pettingzoo_env_to_vec_env_v1(env) is capable of splitting the parallel environment in multiple vectorized ones, but not for the reset() function.
Does anyone know how to fix this problem?
Plese find the Github Repository to reproduce the problem.
You should double check the reset() function in PettingZoo. It will return None instead of an observation like GYM
Thanks to discussion I had in the issue section of the SuperSuit repository, I am able to post the solution to the problem. Thanks to jjshoots!
First of all it is necessary to have the latest SuperSuit version. In order to get that I needed to install Stable-Baseline3 using the instructions here to make it work with gym 0.24+.
After that, taking the code in the question as example, it is necessary to substitute
def env(**kwargs):
env_ = parallel_env(**kwargs)
env_ = ss.pettingzoo_env_to_vec_env_v1(env_)
#env_ = ss.concat_vec_envs_v1(env_, 1)
return env_
with
def env(**kwargs):
env_ = parallel_env(**kwargs)
env_ = ss.pettingzoo_env_to_vec_env_v1(env_)
env_ = ss.concat_vec_envs_v1(env_, 1, base_class="stable_baselines3")
return env_
The outcomes are:
Outcome 1: leaving the line with check_env(env) I got an error AssertionError: Your environment must inherit from the gym.Env class cf https://github.com/openai/gym/blob/master/gym/core.py
Outcome 2: removing the line with check_env(env), the agent starts training successfully!
In the end, I think that the argument base_class="stable_baselines3" made the difference.
Only the small problem on check_env remains to be reported, but I think it can be considered as trivial if the training works.

Issues in creating pysimplegui or Tkinter graph GUI by reading csv file , cleaning it and plotting graph (histogram+PDF)

I want to create GUI which should automatically clean data in csv file once selected and plot superimposed PDF & histogram graph. I have uploaded basic python program which generates the required graph but I am unbale to convert it into interface. I guess, only "open file" & "plot" buttons would suffice the requirement. image- want to retrieve data from 'N'th column (13) only with skipping top 4 rows
I am basically from metallurgy background and trying my hands in this field.
Any help would be greatly appreciated
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
raw_data = pd.read_csv("D:/Project/Python/NDC/Outlier_ND/800016_DAT.csv",skiprows=4,header=None)
clean = pd.DataFrame(raw_data)
data1 = clean.iloc[:, [13]]
Q1 = data1.quantile(0.25)
Q3 = data1.quantile(0.75)
IQR = Q3 - Q1
data_IQR = data1[~((data1 < (Q1 - 1.5 * IQR)) |(data1 > (Q3 + 1.5 * IQR))).any(axis=1)]
data_IQR.shape
print(data1.shape)
print(data_IQR.shape)
headerList = ['Actual_MR']
data_IQR.to_csv(r'D:\Project\Python\NDC\Outlier_ND\800016_DAT_IQR.csv', header=headerList, index=False)
data = pd.read_csv("D:/Project/Python/NDC/Outlier_ND/800016_DAT_IQR.csv")
mean, sd = norm.fit(data)
plt.hist(data, bins=25, density=True, alpha=0.6, facecolor = '#2ab0ff', edgecolor='#169acf', linewidth=0.5)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mean, sd)
plt.plot(x, p, 'red', linewidth=2)
title = " Graph \n mean: {:.2f} and SD: {:.2f}".format(mean, sd)
plt.title(title)
plt.xlabel('MR')
plt.ylabel('Pr')
plt.show()
Following code demo how PySimpleGUI to work with matplotlib, detail please find all remark in script.
import math, random
from pathlib import Path
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
import PySimpleGUI as sg
# 1. Define the class as the interface between matplotlib and PySimpleGUI
class Canvas(FigureCanvasTkAgg):
"""
Create a canvas for matplotlib pyplot under tkinter/PySimpleGUI canvas
"""
def __init__(self, figure=None, master=None):
super().__init__(figure=figure, master=master)
self.canvas = self.get_tk_widget()
self.canvas.pack(side='top', fill='both', expand=1)
# 2. create PySimpleGUI window, a fixed-size Frame with Canvas which expand in both x and y.
font = ("Courier New", 11)
sg.theme("DarkBlue3")
sg.set_options(font=font)
layout = [
[sg.Input(expand_x=True, key='Path'),
sg.FileBrowse(file_types=(("ALL CSV Files", "*.csv"), ("ALL Files", "*.*"))),
sg.Button('Plot')],
[sg.Frame("", [[sg.Canvas(background_color='green', expand_x=True, expand_y=True, key='Canvas')]], size=(640, 480))],
[sg.Push(), sg.Button('Exit')]
]
window = sg.Window('Matplotlib', layout, finalize=True)
# 3. Create a matplotlib canvas under sg.Canvas or sg.Graph
fig = Figure(figsize=(5, 4), dpi=100)
ax = fig.add_subplot()
canvas = Canvas(fig, window['Canvas'].Widget)
# 4. initial for figure
ax.set_title(f"Sensor Data")
ax.set_xlabel("X axis")
ax.set_ylabel("Y axis")
ax.set_xlim(0, 1079)
ax.set_ylim(-1.1, 1.1)
ax.grid()
canvas.draw() # do Update to GUI canvas
# 5. PySimpleGUI event loop
while True:
event, values = window.read()
if event in (sg.WINDOW_CLOSED, 'Exit'):
break
elif event == 'Plot':
"""
path = values['Path']
if not Path(path).is_file():
continue
"""
# 6. Get data from path and plot from here
ax.cla() # Clear axes first if required
ax.set_title(f"Sensor Data")
ax.set_xlabel("X axis")
ax.set_ylabel("Y axis")
ax.grid()
theta = random.randint(0, 359)
x = [degree for degree in range(1080)]
y = [math.sin((degree+theta)/180*math.pi) for degree in range(1080)]
ax.plot(x, y)
canvas.draw() # do Update to GUI canvas
# 7. Close window to exit
window.close()

Keras Functional API and loss function with multiple inputs

I am trying to use a custom Keras loss function that apart from the usual signature (y_true, y_pred) takes another parameter sigma (which is also produced by the last layer of the network).
The training works fine, but then I am not sure how to perform forward propagation and return sigma (while muis the output of the model.predict method).
This is the code I am using, which features a custom layer GaussianLayer that returns the list [mu, sigma].
import tensorflow as tf
from keras import backend as K
from keras.layers import Input, Dense, Layer, Dropout
from keras.models import Model
from keras.initializers import glorot_normal
import numpy as np
def custom_loss(sigma):
def gaussian_loss(y_true, y_pred):
return tf.reduce_mean(0.5*tf.log(sigma) + 0.5*tf.div(tf.square(y_true - y_pred), sigma)) + 10
return gaussian_loss
class GaussianLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(GaussianLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel_1 = self.add_weight(name='kernel_1',
shape=(30, self.output_dim),
initializer=glorot_normal(),
trainable=True)
self.kernel_2 = self.add_weight(name='kernel_2',
shape=(30, self.output_dim),
initializer=glorot_normal(),
trainable=True)
self.bias_1 = self.add_weight(name='bias_1',
shape=(self.output_dim, ),
initializer=glorot_normal(),
trainable=True)
self.bias_2 = self.add_weight(name='bias_2',
shape=(self.output_dim, ),
initializer=glorot_normal(),
trainable=True)
super(GaussianLayer, self).build(input_shape)
def call(self, x):
output_mu = K.dot(x, self.kernel_1) + self.bias_1
output_sig = K.dot(x, self.kernel_2) + self.bias_2
output_sig_pos = K.log(1 + K.exp(output_sig)) + 1e-06
return [output_mu, output_sig_pos]
def compute_output_shape(self, input_shape):
return [(input_shape[0], self.output_dim), (input_shape[0], self.output_dim)]
# This returns a tensor
inputs = Input(shape=(1,))
x = Dense(30, activation='relu')(inputs)
x = Dropout(0.3)(x)
x = Dense(30, activation='relu')(x)
x = Dense(40, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(30, activation='relu')(x)
mu, sigma = GaussianLayer(1)(x)
model = Model(inputs, mu)
model.compile(loss=custom_loss(sigma), optimizer='adam')
model.fit(train_x, train_y, epochs=150)
Since your model returns two tensors as output, you also need to pass a list of two arrays as the output when calling fit() method. That's essentially what the error is trying to convey:
Error when checking model target:
So the error is in targets (i.e. labels). What is wrong?
the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 2 array(s), but instead got the following list of 1 arrays:
I may have found the answer among Keras FAQs.
I found out that it is possible to retrieve intermediate steps' output using the code snippet below:
layer_name = 'main_output'
intermediate_layer_model = Model(inputs=model.input,
outputs=model.get_layer(layer_name).output)
intermediate_output = intermediate_layer_model.predict(train_x[0])
intermediate_output
In this case intermediate_output is a list of two values [mu, sigma] (just needed to name the output layer main_output and retrieve it later)

Dumping numpy arrays into neural network

I am trying to solve the titanic machine learning challenge from kaggle using a neural network. I removed most of the irrelevant data and converted the useful data into a 2D numpy array while the survival is converted into a 1D numpy array. For some reason it throws an error saying dimension 0 in both shape must be equal, I've been trying to solve it for quite a while and I hope that you guys can help out.
TensorFlowNumpy.py
import tensorflow as tf
def numpy2tensor(numpy):
sess = tf.Session()
with sess.as_default():
return tf.constant(numpy)
def tensor2numpy(tensor):
sess = tf.Session()
with sess.as_default():
return tensor.eval()
Dataset.py
import pandas
import numpy as np
dataset = pandas.read_csv('train.csv')
dataset2= dataset.drop(['PassengerId','Survived','Name','Ticket','Fare','Cabin','Embarked'],axis=1)
dataset3= dataset2.fillna(0)
survive = pandas.read_csv('train.csv')
survival = np.float32(survive.Survived)
dataset4 = np.float32(dataset3)
MainCode.py
import tensorflow as tf
import numpy
from dataset import dataset4,survival
from sklearn.model_selection import train_test_split
from TensorFlowNumpy import numpy2tensor
train_x,test_x,train_y,test_y = train_test_split(dataset4,survival,test_size
= 0.2)
tensor_train_x = numpy2tensor(train_x)
tensor_train_y = numpy2tensor(train_y)
tensor_test_x = numpy2tensor(test_x)
tensor_test_y = numpy2tensor(test_y)
n_nodes_hl1 = 10
n_nodes_hl2 = 10
n_classes = 2
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)
def neural_network_model(data):
hidden_1_layer = {'weights':tf.Variable(tf.random_normal([5,
n_nodes_hl1])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl1]))}
hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1,
n_nodes_hl2])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl2]))}
output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2,
n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes]))}
l1 = tf.add(tf.matmul(data,hidden_1_layer['weights']),
hidden_1_layer['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1,hidden_2_layer['weights']),
hidden_2_layer['biases'])
l2 = tf.nn.relu(l2)
output = tf.matmul(l2,output_layer['weights']) + output_layer['biases']
return output
def train_neural_network(x):
prediction = neural_network_model(x)
cost =
tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,
labels=tensor_train_y))
optimizer1 = tf.train.GradientDescentOptimizer(0.001).minimize(cost)
hm_epochs = 100
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
_, c = sess.run([optimizer1, cost], feed_dict={x:tensor_train_x,
y:tensor_train_y})
epoch_loss += c
print('Epoch', epoch+1, 'completed out
of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:test_x, y:test_y}))
train_neural_network(tensor_train_x)
I have faced this error several times, problem is obviously in our code. I didn't look through your code thoroughly as i am leaving for the day, but i smell that your dependent variable/ output variable shape is [1,712] which should be [712,1] so some where in the code try to fix it. Basically what it meant is you are having one row with 712 columns but it should be 712 rows with 1 column(output). Please mark this as answer if it helps. Ping me tomorrow if problem still exists. I will take a look at it.

Can I export a tensorflow summary to CSV?

Is there a way to extract scalar summaries to CSV (preferably from within tensorboard) from tfevents files?
Example code
The following code generates tfevent files in a summary_dir within the same directory. Suppose you let it run and you find something interesting. You want to get the raw data for further investigation. How would you do that?
#!/usr/bin/env python
"""A very simple MNIST classifier."""
import argparse
import sys
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
ce_with_logits = tf.nn.softmax_cross_entropy_with_logits
FLAGS = None
def inference(x):
"""
Build the inference graph.
Parameters
----------
x : placeholder
Returns
-------
Output tensor with the computed logits.
"""
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b
return y
def loss(logits, labels):
"""
Calculate the loss from the logits and the labels.
Parameters
----------
logits : Logits tensor, float - [batch_size, NUM_CLASSES].
labels : Labels tensor, int32 - [batch_size]
"""
cross_entropy = tf.reduce_mean(ce_with_logits(labels=labels,
logits=logits))
return cross_entropy
def training(loss, learning_rate=0.5):
"""
Set up the training Ops.
Parameters
----------
loss : Loss tensor, from loss().
learning_rate : The learning rate to use for gradient descent.
Returns
-------
train_op: The Op for training.
"""
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_step = optimizer.minimize(loss)
return train_step
def main(_):
# Import data
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
y = inference(x)
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
loss_ = loss(logits=y, labels=y_)
train_step = training(loss_)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.name_scope('accuracy'):
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()
sess = tf.InteractiveSession()
train_writer = tf.summary.FileWriter('summary_dir/train', sess.graph)
test_writer = tf.summary.FileWriter('summary_dir/test', sess.graph)
tf.global_variables_initializer().run()
for train_step_i in range(100000):
if train_step_i % 100 == 0:
summary, acc = sess.run([merged, accuracy],
feed_dict={x: mnist.test.images,
y_: mnist.test.labels})
test_writer.add_summary(summary, train_step_i)
summary, acc = sess.run([merged, accuracy],
feed_dict={x: mnist.train.images,
y_: mnist.train.labels})
train_writer.add_summary(summary, train_step_i)
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
print(sess.run(accuracy, feed_dict={x: mnist.test.images,
y_: mnist.test.labels}))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir',
type=str,
default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
While the answer here is as requested within tensorboard it only allows to download a csv for a single run of a single tag.
If you have for example 10 tags and 20 runs (what is not at all much) you would need to do the above step 200 times (that alone will probably take you more than a hour).
If now you for some reason would like to actually do something with the data for all runs for a single tag you would need to write some weird CSV accumulation script or copy everything by hand (what will probably cost you more than a day).
Therefore I would like to add a solution that extracts a CSV file for every tag with all runs contained. Column headers are the run path names and row indices are the run step numbers.
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
def tabulate_events(dpath):
summary_iterators = [EventAccumulator(os.path.join(dpath, dname)).Reload() for dname in os.listdir(dpath)]
tags = summary_iterators[0].Tags()['scalars']
for it in summary_iterators:
assert it.Tags()['scalars'] == tags
out = defaultdict(list)
steps = []
for tag in tags:
steps = [e.step for e in summary_iterators[0].Scalars(tag)]
for events in zip(*[acc.Scalars(tag) for acc in summary_iterators]):
assert len(set(e.step for e in events)) == 1
out[tag].append([e.value for e in events])
return out, steps
def to_csv(dpath):
dirs = os.listdir(dpath)
d, steps = tabulate_events(dpath)
tags, values = zip(*d.items())
np_values = np.array(values)
for index, tag in enumerate(tags):
df = pd.DataFrame(np_values[index], index=steps, columns=dirs)
df.to_csv(get_file_path(dpath, tag))
def get_file_path(dpath, tag):
file_name = tag.replace("/", "_") + '.csv'
folder_path = os.path.join(dpath, 'csv')
if not os.path.exists(folder_path):
os.makedirs(folder_path)
return os.path.join(folder_path, file_name)
if __name__ == '__main__':
path = "path_to_your_summaries"
to_csv(path)
My solution builds upon: https://stackoverflow.com/a/48774926/2230045
EDIT:
I created a more sophisticated version and released it on GitHub: https://github.com/Spenhouet/tensorboard-aggregator
This version aggregates multiple tensorboard runs and is able to save the aggregates to a new tensorboard summary or as a .csv file.
Just check the "Data download links" option on the upper-left in TensorBoard, and then click on the "CSV" button that will appear under your scalar summary.
Here is my solution which bases on the previous solutions but can scale up.
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
def tabulate_events(dpath):
final_out = {}
for dname in os.listdir(dpath):
print(f"Converting run {dname}",end="")
ea = EventAccumulator(os.path.join(dpath, dname)).Reload()
tags = ea.Tags()['scalars']
out = {}
for tag in tags:
tag_values=[]
wall_time=[]
steps=[]
for event in ea.Scalars(tag):
tag_values.append(event.value)
wall_time.append(event.wall_time)
steps.append(event.step)
out[tag]=pd.DataFrame(data=dict(zip(steps,np.array([tag_values,wall_time]).transpose())), columns=steps,index=['value','wall_time'])
if len(tags)>0:
df= pd.concat(out.values(),keys=out.keys())
df.to_csv(f'{dname}.csv')
print("- Done")
else:
print('- Not scalers to write')
final_out[dname] = df
return final_out
if __name__ == '__main__':
path = "youre/path/here"
steps = tabulate_events(path)
pd.concat(steps.values(),keys=steps.keys()).to_csv('all_result.csv')
Very minimal example:
import pandas as pd
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
log_dir = "lightning_logs/version_1"
event_accumulator = EventAccumulator(log_dir)
event_accumulator.Reload()
events = event_accumulator.Scalars("train_loss")
x = [x.step for x in events]
y = [x.value for x in events]
df = pd.DataFrame({"step": x, "train_loss": y})
df.to_csv("train_loss.csv")
print(df)
step train_loss
0 0 700.491516
1 1 163.593246
2 2 146.365448
3 3 153.830215
...
Plotting loss vs epochs example:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
log_dir = "lightning_logs/version_1"
y_key = "val_loss"
event_accumulator = EventAccumulator(log_dir)
event_accumulator.Reload()
steps = {x.step for x in event_accumulator.Scalars("epoch")}
x = list(range(len(steps)))
y = [x.value for x in event_accumulator.Scalars(y_key) if x.step in steps]
df = pd.DataFrame({"epoch": x, y_key: y})
df.to_csv(f"{y_key}.csv")
fig, ax = plt.subplots()
sns.lineplot(data=df, x="epoch", y=y_key)
fig.savefig("plot.png", dpi=300)
Just to add to #Spen
in case you want to export the data when you have varying numbers of steps.
This will make one large csv file.
Might need to change around the keys for it to work for you.
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import glob
import pandas as pd
listOutput = (glob.glob("*/"))
listDF = []
for tb_output_folder in listOutput:
print(tb_output_folder)
x = EventAccumulator(path=tb_output_folder)
x.Reload()
x.FirstEventTimestamp()
keys = ['loss', 'mean_absolute_error', 'val_loss', 'val_mean_absolute_error']
listValues = {}
steps = [e.step for e in x.Scalars(keys[0])]
wall_time = [e.wall_time for e in x.Scalars(keys[0])]
index = [e.index for e in x.Scalars(keys[0])]
count = [e.count for e in x.Scalars(keys[0])]
n_steps = len(steps)
listRun = [tb_output_folder] * n_steps
printOutDict = {}
data = np.zeros((n_steps, len(keys)))
for i in range(len(keys)):
data[:,i] = [e.value for e in x.Scalars(keys[i])]
printOutDict = {keys[0]: data[:,0], keys[1]: data[:,1],keys[2]: data[:,2],keys[3]: data[:,3]}
printOutDict['Name'] = listRun
DF = pd.DataFrame(data=printOutDict)
listDF.append(DF)
df = pd.concat(listDF)
df.to_csv('Output.csv')