Regarding Implementation of Gradient Descent for Polynomial Regression - regression

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats
from numpy.linalg import inv
import seaborn as sns
url = r'C:\Users\pchan\kc_house_train_data.csv'
df = pd.read_csv(url,index_col=0)
features_1 = ['sqft_living', 'bedrooms', 'bathrooms', 'lat', 'long']
x=df.filter(features_1)
x = np.c_[np.ones((x.shape[0], 1)), x]
x=pd.DataFrame(x)
y=df.filter(['price'])
y=y.reset_index(drop=True)
x_new=x.T
y.rename(columns = {'price':0}, inplace = True)
w=pd.DataFrame([0]*(x_new.shape[0]))
cost=[]
i=0
a=0.00001
while(i<50):
temp=x.T#(y-x#w)
w=w+(a*temp)
i+=1
print(w)
from sklearn.linear_model import LinearRegression
reg=LinearRegression().fit(x,y)
res=reg.coef_
print(res)
w_closed=np.linalg.inv(x.T#x) # x.T # y
print(w_closed)
Closed Form and Linear Regression from sklearn was able to get correct weights,
But not with gradient descent approach(using Matrix notation).
Whats wrong with Gradient Descent approach here?

Related

Facing the issue while fitting my model (bi-lstm + crf). ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list)

I am trying to solve a problem which contains bi-LSTM and CRF, while fitting the model, i am facing this issue ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list). Below is the structure of the dataframe.
Columns named "CompanyId" that contains integer. "Name" that contains string. "TableTypeCode" that is a string that is constant and is same as "BS". and final column named "BlockName". I want to train a model using bidirectional lstm and crf . Input being "CompanyId", "Name", and "TableTypeCode" and should predict "BlockName".
import numpy as np
import pandas as pd
df=pd.read_excel("data.xlsx")
from keras.layers import TimeDistributed
from keras.layers import Dense
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.layers import Input, Embedding, LSTM, Dense, TimeDistributed, Bidirectional
from keras.models import Model
!pip install tensorflow-addons==0.16.1
import tensorflow_addons as tfa
X = df[['CompanyId', 'Name', 'TableTypeCode']]
y = df['BlockName']
# Preprocess the data
# One-hot encode the 'CompanyId' and 'TableTypeCode' columns
X = pd.get_dummies(X, columns=['CompanyId', 'TableTypeCode'])
# Tokenize the 'Name' column
X['Name'] = X['Name'].apply(str)
tokenizer = Tokenizer()
X['Name'] = X['Name'].apply(lambda x: x.split())
X['Name'] = tokenizer.texts_to_sequences(X['Name'])
# Encode the target column
encoder = LabelEncoder()
y = encoder.fit_transform(y)
y = to_categorical(y)
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
n_classes = df['BlockName'].nunique()
# Define the model architecture
input_ = Input(shape=(X.shape[1],))
embedding = Embedding(input_dim=X.shape[1], output_dim=50)(input_)
lstm = Bidirectional(LSTM(units=100))(embedding)
output = Dense(n_classes, activation='softmax')(lstm)
model = Model(input_, output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Train the model
model.fit(X_train, y_train)
There was no issue till the last line of code. Help me fix this and train my model.

Access variables from parallel job lib

I have set up a job lib for parallel computing, so far, I have been able to use it in computing several metrics. I intend to compute g_mean, after the roc_auc. However, I am unable to retrieve y_test and svm_probs from the function. It gives an error when I try to retrieve it.
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.datasets import make_classification
import pandas as pd
from sklearn.model_selection import KFold
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn import svm
from sklearn import datasets
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import LinearSVC
from joblib import Parallel, delayed
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data
y = data.target
skf = StratifiedKFold(n_splits=5)
clf = svm.SVC(kernel='rbf', probability=True)
def train(train_index, test_index):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
clf.fit(X_train, y_train)
r_probs = [0 for _ in range (len(y_test))]
svm_probs = clf.predict_proba(X_test)
svm_probs = svm_probs[:,1]
svm_auc = roc_auc_score(y_test, svm_probs)
return dict(svm_auc=svm_auc)
out = Parallel(n_jobs=2, verbose=100, pre_dispatch='1.5*n_jobs')(
delayed(train)(train_index, test_index) for train_index, test_index in skf.split(X, y))
svm_auc = [d['svm_auc'] for d in out]
print(np.mean(svm_auc))
rf_fpr,rf_tpr, _ = roc_curve(y_test,svm_probs)
gmeans_rf = np.sqrt(rf_tpr * (1-rf_fpr))
ix_rf = np.argmax(gmeans_rf)
print("%.3f" % gmeans_rf[ix_rf])

How to remove the discontinuous lines of a function

I'm plotting a function that has several discontinuities. the function is given. I want to connect points with lines only where the function is continuous.
Here is a simplified example of what plot is doing.
import numpy as np
from math import*
import matplotlib.pyplot as plt
from scipy.special import jv, kn
a=sqrt(300-1)
x = np.linspace(0, 0.2, 500)
J0 = jv(0, a*x)
J1 = jv(1, a*x)
K0 = kn(0,x)
K1 = kn(1,x)
Y2=a*x*(J1/J0)
Y3=x*(K1/K0)
plt.xlabel('x')
plt.ylabel('y')
plt.ylim(-10,10)
plt.axhline(0, color='black')
plt.plot(x,Y2)
plt.plot(x,Y3)
plt.show()

Lorentz Fitting not guessing well enough

The fitting issue with the Lorentzian plot fitting. On originpro it is fitting good.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from lmfit.models import LorentzianModel
import numpy as np
datas = pd.read_csv('pg60_02_0.5V.csv')
a = datas.to_numpy()
a = a.transpose()
x= a[0]
y= a[1]
model = LorentzianModel()
pars = model.guess(y, x=x)
out = model.fit(y, pars, x=x)
print(out.fit_report())
plt.plot(x, y)
plt.plot(x, out.best_fit, '-', label='best fit')
plt.legend()
plt.show()
Here is the Plot.

Result of auto-encoder dimensions are incorrect

Using below code I'm attempting to encode image from mnist into a lower dimension representation :
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import metrics
import datetime
from sklearn.preprocessing import MultiLabelBinarizer
import seaborn as sns
sns.set_style("darkgrid")
from ast import literal_eval
import numpy as np
from sklearn.preprocessing import scale
import seaborn as sns
sns.set_style("darkgrid")
import torch
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
%matplotlib inline
low_dim_rep = 32
epochs = 2
cuda = torch.cuda.is_available() # True if cuda is available, False otherwise
FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
print('Training on %s' % ('GPU' if cuda else 'CPU'))
# Loading the MNIST data set
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.1307,), (0.3081,))])
mnist = torchvision.datasets.MNIST(root='../data/', train=True, transform=transform, download=True)
# Loader to feed the data batch by batch during training.
batch = 100
data_loader = torch.utils.data.DataLoader(mnist, batch_size=batch, shuffle=True)
encoder = nn.Sequential(
# Encoder
nn.Linear(28 * 28, 64),
nn.PReLU(64),
nn.BatchNorm1d(64),
# Low-dimensional representation
nn.Linear(64, low_dim_rep),
nn.PReLU(low_dim_rep),
nn.BatchNorm1d(low_dim_rep))
decoder = nn.Sequential(
# Decoder
nn.Linear(low_dim_rep, 64),
nn.PReLU(64),
nn.BatchNorm1d(64),
nn.Linear(64, 28 * 28))
autoencoder = nn.Sequential(encoder, decoder)
encoder = encoder.type(FloatTensor)
decoder = decoder.type(FloatTensor)
autoencoder = autoencoder.type(FloatTensor)
optimizer = torch.optim.Adam(params=autoencoder.parameters(), lr=0.00001)
data_size = int(mnist.train_labels.size()[0])
print('data_size' , data_size)
for i in range(epochs):
for j, (images, _) in enumerate(data_loader):
images = images.view(images.size(0), -1) # from (batch 1, 28, 28) to (batch, 28, 28)
images = Variable(images).type(FloatTensor)
autoencoder.zero_grad()
reconstructions = autoencoder(images)
loss = torch.dist(images, reconstructions)
loss.backward()
optimizer.step()
print('Epoch %i/%i loss %.2f' % (i + 1, epochs, loss.data[0]))
print('Optimization finished.')
# Get the encoded images here
encoded_images = []
for j, (images, _) in enumerate(data_loader):
images = images.view(images.size(0), -1)
images = Variable(images).type(FloatTensor)
encoded_images.append(encoder(images))
Upon completion of this code
len(encoded_images) is 600 when I expect the length to match the number of images in mnist : len(mnist) - 60'000.
How to encode the images to a lower dimension representation of 32 ( low_dim_rep = 32 ) ? I've defined the network parameters incorrectly ?
You have 60000 images in mnist and your batch = 100. That is why your len(encoded_images)=600 because you do 60000/100=600 iterations when generating encoded image. You end up with a list of 600 elements where each element has shape [100, 32]. You can do the following
encoded_images = torch.zeros(len(mnist), 32)
for j, (images, _) in enumerate(data_loader):
images = images.view(images.size(0), -1)
images = Variable(images).type(FloatTensor)
encoded_images[j * batch : (j+1) * batch] = encoder(images)