I'm plotting a function that has several discontinuities. the function is given. I want to connect points with lines only where the function is continuous.
Here is a simplified example of what plot is doing.
import numpy as np
from math import*
import matplotlib.pyplot as plt
from scipy.special import jv, kn
a=sqrt(300-1)
x = np.linspace(0, 0.2, 500)
J0 = jv(0, a*x)
J1 = jv(1, a*x)
K0 = kn(0,x)
K1 = kn(1,x)
Y2=a*x*(J1/J0)
Y3=x*(K1/K0)
plt.xlabel('x')
plt.ylabel('y')
plt.ylim(-10,10)
plt.axhline(0, color='black')
plt.plot(x,Y2)
plt.plot(x,Y3)
plt.show()
Related
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats
from numpy.linalg import inv
import seaborn as sns
url = r'C:\Users\pchan\kc_house_train_data.csv'
df = pd.read_csv(url,index_col=0)
features_1 = ['sqft_living', 'bedrooms', 'bathrooms', 'lat', 'long']
x=df.filter(features_1)
x = np.c_[np.ones((x.shape[0], 1)), x]
x=pd.DataFrame(x)
y=df.filter(['price'])
y=y.reset_index(drop=True)
x_new=x.T
y.rename(columns = {'price':0}, inplace = True)
w=pd.DataFrame([0]*(x_new.shape[0]))
cost=[]
i=0
a=0.00001
while(i<50):
temp=x.T#(y-x#w)
w=w+(a*temp)
i+=1
print(w)
from sklearn.linear_model import LinearRegression
reg=LinearRegression().fit(x,y)
res=reg.coef_
print(res)
w_closed=np.linalg.inv(x.T#x) # x.T # y
print(w_closed)
Closed Form and Linear Regression from sklearn was able to get correct weights,
But not with gradient descent approach(using Matrix notation).
Whats wrong with Gradient Descent approach here?
I have set up a job lib for parallel computing, so far, I have been able to use it in computing several metrics. I intend to compute g_mean, after the roc_auc. However, I am unable to retrieve y_test and svm_probs from the function. It gives an error when I try to retrieve it.
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.datasets import make_classification
import pandas as pd
from sklearn.model_selection import KFold
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn import svm
from sklearn import datasets
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import LinearSVC
from joblib import Parallel, delayed
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data
y = data.target
skf = StratifiedKFold(n_splits=5)
clf = svm.SVC(kernel='rbf', probability=True)
def train(train_index, test_index):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
clf.fit(X_train, y_train)
r_probs = [0 for _ in range (len(y_test))]
svm_probs = clf.predict_proba(X_test)
svm_probs = svm_probs[:,1]
svm_auc = roc_auc_score(y_test, svm_probs)
return dict(svm_auc=svm_auc)
out = Parallel(n_jobs=2, verbose=100, pre_dispatch='1.5*n_jobs')(
delayed(train)(train_index, test_index) for train_index, test_index in skf.split(X, y))
svm_auc = [d['svm_auc'] for d in out]
print(np.mean(svm_auc))
rf_fpr,rf_tpr, _ = roc_curve(y_test,svm_probs)
gmeans_rf = np.sqrt(rf_tpr * (1-rf_fpr))
ix_rf = np.argmax(gmeans_rf)
print("%.3f" % gmeans_rf[ix_rf])
The fitting issue with the Lorentzian plot fitting. On originpro it is fitting good.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from lmfit.models import LorentzianModel
import numpy as np
datas = pd.read_csv('pg60_02_0.5V.csv')
a = datas.to_numpy()
a = a.transpose()
x= a[0]
y= a[1]
model = LorentzianModel()
pars = model.guess(y, x=x)
out = model.fit(y, pars, x=x)
print(out.fit_report())
plt.plot(x, y)
plt.plot(x, out.best_fit, '-', label='best fit')
plt.legend()
plt.show()
Here is the Plot.
I want to make a plot with linear x- and y-axis, plus a log top x-axis showing ticks as a function of the bottom x-axis. I am unsure on what to pass to the ticks though, or if it is more convenient to separately define the function to build the upper log-axis ticks (something like it is done here). I would like the ticks on the upper log-axis in steps of 0.1.
This is a MWE:
from matplotlib.ticker import ScalarFormatter, FormatStrFormatter
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
fig, ax1 = plt.subplots(1, figsize=(10,6))
ax1.set_ylabel(r'y axis')
ax1.set_xlabel(r'Linear axis')
ax1.set_ylim(0.1,1.)
ax1.set_xlim(0.1,1.5)
#Upper lox-axis
new_tick_locations =
[np.log(i*1.e37/(2.*(3.809e8))) for i in np.arange(0.1, 10., 0.1)] #I should pass something else instead of arange
#I'd like the upper axis ticks in steps of 0.1 anyway
axup=ax1.twiny()
axup.set_xticks(new_tick_locations)
axup.set_xlabel(r'Log axis')
plt.show()
Secondary axis
Update: It turns out this is much simpler with secondary_xaxis() instead of twiny(). You can use the functions param to specify the transform and inverse functions between the bottom and top axes:
import matplotlib.pyplot as plt
import numpy as np
fig, ax1 = plt.subplots(1, figsize=(10,6))
ax1.set_ylabel('y axis')
ax1.set_xlabel('Linear axis')
ax1.set_ylim(0.1, 1.)
ax1.set_xlim(0.1e-9, 1.5e-9)
# secondary x-axis transformed with x*(a*b) and inverted with x/(a*b)
a, b = 4.*np.pi, np.float64((2.*3.086e22)**2.)
axup = ax1.secondary_xaxis('top', functions=(lambda x: x*(a*b), lambda x: x/(a*b)))
axup.set_xscale('log')
axup.set_xlabel('Log axis')
plt.show()
Original example:
# secondary x-axis transformed with x*a/b and inverted with x*b/a
ax1.set_xlim(0.1, 10.)
a, b = 1.e37, 2.*(3.809e8)
axup = ax1.secondary_xaxis('top', functions=(lambda x: x*a/b, lambda x: x*b/a))
Callback
You can use Axes callbacks to connect ax1 with axup:
[The Axes callback] events you can connect to are xlim_changed and ylim_changed and the callback will be called with func(ax) where ax is the Axes instance.
Here the ax1.xlim_changed event triggers scale_axup() to scale axup.xlim as scale(ax1.xlim). Note that I increased the xlim up to 10 to demonstrate more major ticks:
from matplotlib.ticker import LogFormatterMathtext
import matplotlib.pyplot as plt
import numpy as np
fig, ax1 = plt.subplots(1, figsize=(15,9))
# axup scaler
scale = lambda x: x*1.e37/(2.*(3.809e8))
# set axup.xlim to scale(ax1.xlim)
def scale_axup(ax1):
# mirror xlim on both axes
left, right = scale(np.array(ax1.get_xlim()))
axup.set_xlim(left, right)
# set xticks to 0.1e28 intervals
xticks = np.arange(float(f'{left:.1e}'), float(f'{right:.1e}'), 0.1e28)
axup.set_xticks([float(f'{tick:.0e}') for tick in xticks])
axup.xaxis.set_major_formatter(LogFormatterMathtext())
# redraw to update xticks
axup.figure.canvas.draw()
# connect ax1 with axup (before ax1.set_xlim())
axup = ax1.twiny()
axup.set_xscale('log')
axup.set_xlabel(r'Log axis')
ax1.callbacks.connect(r'xlim_changed', scale_axup)
ax1.set_ylabel(r'y axis')
ax1.set_xlabel(r'Linear axis')
ax1.set_ylim(0.1, 1.)
ax1.set_xlim(0.1, 10.)
plt.show()
By following the answer you shared, I modified the code according to your needs.
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
from matplotlib.ticker import StrMethodFormatter
fig, ax1 = plt.subplots(1, figsize=(10,6))
ax1.set_ylabel(r'y axis')
ax1.set_xlabel(r'Linear axis')
ax1.set_xlim(0.1,1.5)
#Upper lox-axis
def tick_function(x):
v = np.log(x*1.e37/(2.*(3.809e8)))
return ["%.1f" % z for z in v]
axup_locations = np.arange(0.1, 10., 0.1)
axup=ax1.twiny()
axup.set_xscale('log')
axup.set_xlim(0.1,100)
axup.set_yscale('linear')
axup.xaxis.set_major_formatter(StrMethodFormatter('{x:.0f}'))
axup.set_xlabel(r'Log axis')
plt.show()
Is there a way to extract scalar summaries to CSV (preferably from within tensorboard) from tfevents files?
Example code
The following code generates tfevent files in a summary_dir within the same directory. Suppose you let it run and you find something interesting. You want to get the raw data for further investigation. How would you do that?
#!/usr/bin/env python
"""A very simple MNIST classifier."""
import argparse
import sys
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
ce_with_logits = tf.nn.softmax_cross_entropy_with_logits
FLAGS = None
def inference(x):
"""
Build the inference graph.
Parameters
----------
x : placeholder
Returns
-------
Output tensor with the computed logits.
"""
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b
return y
def loss(logits, labels):
"""
Calculate the loss from the logits and the labels.
Parameters
----------
logits : Logits tensor, float - [batch_size, NUM_CLASSES].
labels : Labels tensor, int32 - [batch_size]
"""
cross_entropy = tf.reduce_mean(ce_with_logits(labels=labels,
logits=logits))
return cross_entropy
def training(loss, learning_rate=0.5):
"""
Set up the training Ops.
Parameters
----------
loss : Loss tensor, from loss().
learning_rate : The learning rate to use for gradient descent.
Returns
-------
train_op: The Op for training.
"""
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_step = optimizer.minimize(loss)
return train_step
def main(_):
# Import data
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
y = inference(x)
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
loss_ = loss(logits=y, labels=y_)
train_step = training(loss_)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.name_scope('accuracy'):
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()
sess = tf.InteractiveSession()
train_writer = tf.summary.FileWriter('summary_dir/train', sess.graph)
test_writer = tf.summary.FileWriter('summary_dir/test', sess.graph)
tf.global_variables_initializer().run()
for train_step_i in range(100000):
if train_step_i % 100 == 0:
summary, acc = sess.run([merged, accuracy],
feed_dict={x: mnist.test.images,
y_: mnist.test.labels})
test_writer.add_summary(summary, train_step_i)
summary, acc = sess.run([merged, accuracy],
feed_dict={x: mnist.train.images,
y_: mnist.train.labels})
train_writer.add_summary(summary, train_step_i)
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
print(sess.run(accuracy, feed_dict={x: mnist.test.images,
y_: mnist.test.labels}))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir',
type=str,
default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
While the answer here is as requested within tensorboard it only allows to download a csv for a single run of a single tag.
If you have for example 10 tags and 20 runs (what is not at all much) you would need to do the above step 200 times (that alone will probably take you more than a hour).
If now you for some reason would like to actually do something with the data for all runs for a single tag you would need to write some weird CSV accumulation script or copy everything by hand (what will probably cost you more than a day).
Therefore I would like to add a solution that extracts a CSV file for every tag with all runs contained. Column headers are the run path names and row indices are the run step numbers.
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
def tabulate_events(dpath):
summary_iterators = [EventAccumulator(os.path.join(dpath, dname)).Reload() for dname in os.listdir(dpath)]
tags = summary_iterators[0].Tags()['scalars']
for it in summary_iterators:
assert it.Tags()['scalars'] == tags
out = defaultdict(list)
steps = []
for tag in tags:
steps = [e.step for e in summary_iterators[0].Scalars(tag)]
for events in zip(*[acc.Scalars(tag) for acc in summary_iterators]):
assert len(set(e.step for e in events)) == 1
out[tag].append([e.value for e in events])
return out, steps
def to_csv(dpath):
dirs = os.listdir(dpath)
d, steps = tabulate_events(dpath)
tags, values = zip(*d.items())
np_values = np.array(values)
for index, tag in enumerate(tags):
df = pd.DataFrame(np_values[index], index=steps, columns=dirs)
df.to_csv(get_file_path(dpath, tag))
def get_file_path(dpath, tag):
file_name = tag.replace("/", "_") + '.csv'
folder_path = os.path.join(dpath, 'csv')
if not os.path.exists(folder_path):
os.makedirs(folder_path)
return os.path.join(folder_path, file_name)
if __name__ == '__main__':
path = "path_to_your_summaries"
to_csv(path)
My solution builds upon: https://stackoverflow.com/a/48774926/2230045
EDIT:
I created a more sophisticated version and released it on GitHub: https://github.com/Spenhouet/tensorboard-aggregator
This version aggregates multiple tensorboard runs and is able to save the aggregates to a new tensorboard summary or as a .csv file.
Just check the "Data download links" option on the upper-left in TensorBoard, and then click on the "CSV" button that will appear under your scalar summary.
Here is my solution which bases on the previous solutions but can scale up.
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
def tabulate_events(dpath):
final_out = {}
for dname in os.listdir(dpath):
print(f"Converting run {dname}",end="")
ea = EventAccumulator(os.path.join(dpath, dname)).Reload()
tags = ea.Tags()['scalars']
out = {}
for tag in tags:
tag_values=[]
wall_time=[]
steps=[]
for event in ea.Scalars(tag):
tag_values.append(event.value)
wall_time.append(event.wall_time)
steps.append(event.step)
out[tag]=pd.DataFrame(data=dict(zip(steps,np.array([tag_values,wall_time]).transpose())), columns=steps,index=['value','wall_time'])
if len(tags)>0:
df= pd.concat(out.values(),keys=out.keys())
df.to_csv(f'{dname}.csv')
print("- Done")
else:
print('- Not scalers to write')
final_out[dname] = df
return final_out
if __name__ == '__main__':
path = "youre/path/here"
steps = tabulate_events(path)
pd.concat(steps.values(),keys=steps.keys()).to_csv('all_result.csv')
Very minimal example:
import pandas as pd
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
log_dir = "lightning_logs/version_1"
event_accumulator = EventAccumulator(log_dir)
event_accumulator.Reload()
events = event_accumulator.Scalars("train_loss")
x = [x.step for x in events]
y = [x.value for x in events]
df = pd.DataFrame({"step": x, "train_loss": y})
df.to_csv("train_loss.csv")
print(df)
step train_loss
0 0 700.491516
1 1 163.593246
2 2 146.365448
3 3 153.830215
...
Plotting loss vs epochs example:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
log_dir = "lightning_logs/version_1"
y_key = "val_loss"
event_accumulator = EventAccumulator(log_dir)
event_accumulator.Reload()
steps = {x.step for x in event_accumulator.Scalars("epoch")}
x = list(range(len(steps)))
y = [x.value for x in event_accumulator.Scalars(y_key) if x.step in steps]
df = pd.DataFrame({"epoch": x, y_key: y})
df.to_csv(f"{y_key}.csv")
fig, ax = plt.subplots()
sns.lineplot(data=df, x="epoch", y=y_key)
fig.savefig("plot.png", dpi=300)
Just to add to #Spen
in case you want to export the data when you have varying numbers of steps.
This will make one large csv file.
Might need to change around the keys for it to work for you.
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import glob
import pandas as pd
listOutput = (glob.glob("*/"))
listDF = []
for tb_output_folder in listOutput:
print(tb_output_folder)
x = EventAccumulator(path=tb_output_folder)
x.Reload()
x.FirstEventTimestamp()
keys = ['loss', 'mean_absolute_error', 'val_loss', 'val_mean_absolute_error']
listValues = {}
steps = [e.step for e in x.Scalars(keys[0])]
wall_time = [e.wall_time for e in x.Scalars(keys[0])]
index = [e.index for e in x.Scalars(keys[0])]
count = [e.count for e in x.Scalars(keys[0])]
n_steps = len(steps)
listRun = [tb_output_folder] * n_steps
printOutDict = {}
data = np.zeros((n_steps, len(keys)))
for i in range(len(keys)):
data[:,i] = [e.value for e in x.Scalars(keys[i])]
printOutDict = {keys[0]: data[:,0], keys[1]: data[:,1],keys[2]: data[:,2],keys[3]: data[:,3]}
printOutDict['Name'] = listRun
DF = pd.DataFrame(data=printOutDict)
listDF.append(DF)
df = pd.concat(listDF)
df.to_csv('Output.csv')