Load completely with request in python (or other ways)

Load completely with request in python (or other ways) - json

Hi
I was wondering if I can load the page completely with python, for example, a hashtag page form Instagram
there is code I tried but it wouldn't load completely
Here's my code
import json
import re
import requests
x = input("Enter your hashtag: ")
response = requests.get('https://www.instagram.com/explore/tags/' + x + '/?__a=1')
if response.status_code == 404:
print('page not found')
input()
exit()
data = response.text
x = re.findall("\"shortcode\":\"[^\"][^\"][^\"][^\"][^\"][^\"][^\"][^\"][^\"][^\"][^\"][^\,]", data)
y = [i.split('"')[3] for i in x]
x = 0
z = len(y)
print(str(z)+' Posts found')
while x < z:
print('\r' + str(x) + ' posts done', end="")
data = requests.get('https://www.instagram.com/p/' + y[x] + '/?__a=1')
y[x] = data.text
x = x + 1
print()
print('post link finished')
Usernames = []
Posts = []
Followers = []
Following = []
x = 0
while x < z:
print('\r' + str(x) + ' Usernames done' , end="")
data = json.loads(y[x])
Usernames.append(data['graphql']['shortcode_media']['owner']['username'])
x = x + 1
print()
print('Usernames finished')
print(len(Usernames))
I want to have more usernames like 100k or more if you can help me with other libraries it isn't important

Related

No results for the web-scrapping from autoscout24

I have a code to retreive data from autoscout24.com for my thesis which I will be working on used car analytics. However, I cannot retreive data and loops do not end. I do not understand the reason behind it. Can anyone help me? Here is the code.
brand = \[\]
model = \[\]
price = \[\]
total = \[\]
a = 101
k = 100
l = 20000 # calculates between € 0 - € 2.000.000
j = 1
while j \<= l:
i = 1
website = 'https://www.autoscout24.com/lst?sort=price&desc=0&cy=NL&atype=C&ustate=N%2CU&damaged_listing=exclude&powertype=kw&pricefrom=' + str(a) + '&priceto=' + str(a+k-1) + '&search_id=2gfy6suaasl&page='
a = a + k
j = j + 1
while i <= 20:
website = website + str(i)
response = requests.get(website)
soup = BeautifulSoup(response.content, 'html.parser')
results = soup.find_all('div', {'class' : 'ListItem_wrapper__J_a_C'})
i = i + 1
for result in results:
brand.append(result.find('h2').get_text())
model.append(result.find('span', {'class':'ListItem_version__jNjur'}).get_text())
price.append(result.find('p', {'class':'Price_price__WZayw'}).get_text().strip())
total.append(result.find('div', {'class':'VehicleDetailTable_container__mUUbY'}).get_text())
I used my computer, I used Google CoLab; however, I could not reach anything at all. If you do this manually, you can reach the data; but no results if you do it in a loop.

Why is this error occurring when I use writerow?

Why am I getting this error when I try to use writerow
db.writerow(loanList[i])
TypeError: writerows() argument must be iterable
'''
import csv
header = ['Name','Results']
file = open("loanResults.csv", "w", newline = "")
a = 10000
b = [0.032,0.043,0.037,0.043,0.044,0.029,0.028,0.030]
c = [6,7,4,3,4,6,7,9,]
loanList = []
def monthlyRepayment(principalAmount,interest,year):
for i in range(len(b)):
finalAmount = principalAmount * ((1 + interest[i]) **year[i])
monthlyAmount = (finalAmount / (year[i]*12))
loanList.append(round(monthlyAmount))
print(round(monthlyAmount))
testCase1 = monthlyRepayment(a,b,c)
db = csv.writer(file)
db.writerow(header)
for i in range(len(loanList)):
db.writerows(loanList[i])
file.close()
'''

wanting to write a txt with Tkinter entry box

just trying to allow users to write in their name and email address for it to be then written into a text file. There are no error messages that pop up it's just, it isn't writing into the file. also, the message box isn't coming up with (+ aname + '\n' + full email + '\n') it just comes up with the message. Cheers
import tkinter as tk #shortening tkinter
import tkinter.messagebox as box
import csv
from tkinter import *
def store_customers():
aname = name.get()
aemail = email.get()
aemailaddress = emailaddress.get()
fullemail = aemail + aemailaddress
print(fullemail)
if (name == "" or email == ""):
print('Error')
messagebox.showerror('error',"their was some issur with your information")
email.set('')
name.set('')
else:
result = messagebox.askquestion('question', 'Your about to enter yor information \n' + aname + '\n' + fullemail + '\n' )
if (result == 'yes'):
print('here')
with open ('customersdata.txt', 'a') as csvfile:
writer = csv.writer(csvfile)
writer.writerow([aname, fullemail])
csvfile.close()
else:
name.set('')
email.set('')
name = StringVar()
email = StringVar()
emailaddress = StringVar()
name = tk.Entry(frame4, text="", bg = '#F0EAD6', font=('Arial',24) )
name.place(x= 400, y = 600)
email = tk.Entry(frame4, text="", bg = '#F0EAD6', font=('Arial',24) )
email.place(x= 400, y = 660)
list1 = ['#yahoo.com','#bing.com','#jpc.vic.edu.au', '#gmail.com', '#hotmail.com' ]
emailaddres= OptionMenu(frame4,emailaddress,*list1)
emailaddres.config(height = 2 )
emailaddress.set('#***.***.edu.au')
emailaddres.place(x= 685, y= 660)
storebtn = tk.Button(frame4, text = 'complete', bg= '#F0EAD6', font=('Arial',24), command = store_customers)
storebtn.place (x = 430, y= 700)
tk.Label(frame4, text= "Your name", bg= '#F0EAD6', font=('Arial',24)).place(x = 260, y = 600)
tk.Label(frame4, text= "email address", bg= '#F0EAD6', font=('Arial',24)).place(x = 240, y = 660)
frame4.mainloop()

Code with Tkinter.
import csv
import tkinter
import tkinter.messagebox
def save():
nome_to_save = namevalue.get()
email_to_save = emailvalue.get()
if len(nome_to_save) == 0 or len(email_to_save) == 0:
tkinter.messagebox.showinfo('Error', 'You need to enter name and e-mail!')
else:
with open('customersdata.csv', 'a', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow([nome_to_save, email_to_save])
csvfile.close()
tkinter.messagebox.showinfo('Success', f'Information of {nome_to_save} saved!')
window = tkinter.Tk()
window.title("Write CSV")
namevalue = tkinter.StringVar()
emailvalue = tkinter.StringVar()
tkinter.Label(window, text = "Name").grid(row = 0)
name = tkinter.Entry(window, textvariable=namevalue).grid(row = 0, column = 1)
tkinter.Label(window, text = "E-mail").grid(row = 1)
email = tkinter.Entry(window, textvariable=emailvalue).grid(row = 1, column = 1)
tkinter.Button(window, text="Save", command = save).grid(row = 2)
tkinter.mainloop()

You probably receive some error, right?
You have imported tkinter.messagebox as box.
So replace messagebox.showerror and messagebox.askquestion with box.showerror and box.askquestion
PS: I am new here, so I cannot comment. Sorry!

This code can help you.
import csv
def store_customers(name, email):
if name == "" or email == "":
print('Error, i need name and email')
else:
with open('customersdata.csv', 'a', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow([name, email])
csvfile.close()
#store_customers('', '')
store_customers('Diego', 'diego#false.io')
store_customers('Ana Maria', 'ana.maria#myemail.io')
store_customers('Julia Neau', 'jneau#cool.io')

handwriting synthesis alex graves

I have been trying to replicate the alex graves handwriting synthesis model, and I did this with tensorflow, and python on a 1080Ti GPU with cuda,
I exactly replicated all of the features explained in the paper and even clipped the respective gradient values in place, but I have real difficulty training it.
I also preproccessed the data in the way explained in the paper, including normalizing the X and y offsets, but the problem is that the training usually can't lower the negative log likelihood more than 1000 which in the paper it reaches -1000, and after that i see NaN weights.
The only extra thing I did was to add 0.0000001 to the conditional probability of every stroke to prevent NaN values in log likelihood.
Any tips or suggestions or experience with such a task?
this is the cell code I use,
class Custom_Cell(RNNCell):
def __init__(self,forget_bias,bias,one_hot_vector, hidden_layer_nums=[700,700,700], mixture_num=10, attention_num=4):
self.bias = bias
self.lstms = []
for i in hidden_layer_nums:
self.lstms.append(LSTMCell(num_units=i, initializer=tf.truncated_normal_initializer(0.075), dtype=tf.float32, forget_bias=forget_bias))
self.attention_num = attention_num
self.mixture_num = mixture_num
self.state_size = 2*sum(hidden_layer_nums) + 3*self.attention_num
self.attention_var_num = 3*self.attention_num
self.output_size = 6*self.mixture_num + 1 + 1
self.one_hot_vector = one_hot_vector
self.lstm_num = len(hidden_layer_nums)
self.hidden_layer_nums = hidden_layer_nums
temp_shape = self.one_hot_vector.shape
self.char_num = temp_shape[2]
self.i_to_h = []
self.w_to_h = []
self.h_to_h = []
self.prev_h_to_h = []
self.lstm_bias = []
self.lstm_to_attention_weights = tf.get_variable("lstms/first_to_attention_mtrx",shape=[hidden_layer_nums[0],self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.lstm_to_attention_bias = tf.get_variable("lstms/first_to_attention_bias",shape=[self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.all_to_output_mtrx = []
for i in range(self.lstm_num):
self.all_to_output_mtrx.append( tf.get_variable("lstms/to_output_mtrx_" + str(i), shape=[hidden_layer_nums[i],self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.all_to_output_bias = tf.get_variable("lstms/output_bias",shape=[self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
for i in range(self.lstm_num):
self.i_to_h.append(tf.get_variable("lstms/i_to_h_"+str(i),shape=[3,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.w_to_h.append(tf.get_variable("lstms/w_to_h_"+str(i),shape=[self.char_num,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.h_to_h.append(tf.get_variable("lstms/h_to_h_"+str(i),shape=[hidden_layer_nums[i],hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.lstm_bias.append(tf.get_variable("lstms/bias_" + str(i),shape=[hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
if not i == 0:
self.prev_h_to_h.append(
tf.get_variable("lstms/prev_h_to_h_" + str(i), shape=[hidden_layer_nums[i-1], hidden_layer_nums[i]],
dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.075),
trainable=True))
def __call__(self, inputs, state, scope=None):
# Extracting previous configuration and vectors
splitarray = []
for i in self.hidden_layer_nums:
splitarray.append(i)
splitarray.append(i)
splitarray.append(3*self.attention_num)
splitted = tf.split(state,splitarray,axis=1)
prev_tuples = []
for i in range(self.lstm_num):
newtuple = LSTMStateTuple(splitted[2*i],splitted[2*i + 1])
prev_tuples.append(newtuple)
prev_attention_vec = splitted[2*self.lstm_num]
new_attention_vec = 0
next_states = []
most_attended = 0
last_output = 0
for i in range(self.lstm_num):
prev_c, prev_h = prev_tuples[i]
cell = self.lstms[i]
if i == 0:
with tf.name_scope("layer_1"):
w, most_attended = self.gaussian_attention(self.one_hot_vector,prev_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[0]) + tf.matmul(prev_h,self.h_to_h[0]) + tf.matmul(w,self.w_to_h[0]) + self.lstm_bias[0]
_, new_state = cell(input_vec, prev_tuples[0])
new_c, new_h = new_state
next_states.append(new_c)
next_states.append(new_h)
last_output = tf.matmul(new_h,self.all_to_output_mtrx[0])
with tf.name_scope("attention_layer"):
temp_attention = tf.matmul(new_h,self.lstm_to_attention_weights) + self.lstm_to_attention_bias
new_alpha, new_beta, new_kappa = tf.split(temp_attention,[self.attention_num,self.attention_num,self.attention_num],axis=1)
old_alpha, old_beta, old_kappa = tf.split(prev_attention_vec,[self.attention_num,self.attention_num,self.attention_num], axis=1)
new_alpha = tf.exp(new_alpha)
new_beta = tf.exp(new_beta)
new_kappa = tf.exp(new_kappa) + old_kappa
new_attention_vec = tf.concat([new_alpha,new_beta,new_kappa],axis=1)
else:
with tf.name_scope("layer_" + str(i)):
w, most_attended = self.gaussian_attention(self.one_hot_vector,new_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[i]) + tf.matmul(next_states[-1],self.prev_h_to_h[i-1]) + tf.matmul(prev_h,self.h_to_h[i]) + tf.matmul(w,self.w_to_h[i]) + self.lstm_bias[i]
_,new_state = cell(input_vec,prev_tuples[i])
new_c, new_h = new_state
next_states.append(new_c)
next_states.append(new_h)
last_output = last_output + tf.matmul(new_h, self.all_to_output_mtrx[i])
with tf.name_scope("output"):
last_output = last_output + self.all_to_output_bias
next_states.append(new_attention_vec)
state_to_return = tf.concat(next_states,axis=1)
output_split_param = [1,self.mixture_num,2*self.mixture_num,2*self.mixture_num,self.mixture_num]
binomial_param, pi, mu, sigma, rho = tf.split(last_output,output_split_param,axis=1)
binomial_param = tf.divide(1.,1.+tf.exp(binomial_param))
pi = tf.nn.softmax(tf.multiply(pi,1.+self.bias),axis=1)
mu = mu
sigma = tf.exp(sigma-self.bias)
rho = tf.tanh(rho)
output_to_return = tf.concat([most_attended, binomial_param, pi, mu, sigma, rho],axis=1)
return output_to_return, state_to_return
def state_size(self):
return self.state_size
def output_size(self):
return self.output_size
def gaussian_attention(self,sequence,params):
with tf.name_scope("attention_calculation"):
alpha, beta, kappa = tf.split(params,[self.attention_num,self.attention_num,self.attention_num],axis=1)
seq_shape = sequence.shape
seq_length = seq_shape[1]
temp_vec = 20*np.asarray(range(seq_length),dtype=float)
final_result = 0
alpha = tf.split(alpha,self.attention_num,1)
beta = tf.split(beta,self.attention_num,1)
kappa = tf.split(kappa,self.attention_num,1)
for i in range(self.attention_num):
alpha_now = alpha[i]
beta_now = beta[i]
kappa_now = kappa[i]
result = kappa_now - temp_vec
result = tf.multiply(tf.square(result),tf.negative(beta_now))
result = tf.multiply(tf.exp(result),alpha_now)
final_result = final_result+result
most_attended = tf.argmax(final_result,axis=1)
most_attended = tf.reshape(tf.cast(most_attended,dtype=tf.float32),shape=[-1,1])
final_result = tf.tile(tf.reshape(final_result,[-1,seq_shape[1],1]),[1,1,seq_shape[2]])
to_return = tf.reduce_sum(tf.multiply(final_result,sequence),axis=1)
return to_return, most_attended
and this is the rnn with loss network:
`to_write_one_hot = tf.placeholder(dtype=tf.float32,shape=(None,line_length,dict_length))
sequence = tf.placeholder(dtype=tf.float32,shape=(None,None,3))
sequence_shift = tf.placeholder(dtype=tf.float32,shape=(None,None,3))
bias = tf.placeholder(shape=[1],dtype=tf.float32)
sequence_length = tf.placeholder(shape=(None),dtype=tf.int32)
forget_bias_placeholder = tf.placeholder(shape=(None),dtype=tf.float32)
graves_cell = Custom_Cell(forget_bias=1,one_hot_vector=to_write_one_hot,hidden_layer_nums=hidden_layer_nums,mixture_num=mixture_num,bias=bias,attention_num=attention_num)
output, state = tf.nn.dynamic_rnn(graves_cell,sequence,dtype=tf.float32,sequence_length=sequence_length)
with tf.name_scope("loss_layer"):
mask = tf.sign(tf.reduce_max(tf.abs(output), 2))
most_attended, binomial_param, pi, mu, sigma, rho = tf.split(output,[1,1,mixture_num,2*mixture_num,2*mixture_num,mixture_num], axis=2)
pi = tf.split(pi,mixture_num,axis=2)
mu = tf.split(mu,mixture_num,axis=2)
sigma = tf.split(sigma,mixture_num,axis=2)
rho = tf.split(rho,mixture_num,axis=2)
negative_log_likelihood = 0
probability = 0
x1, x2, e = tf.split(sequence_shift,3,axis=2)
for i in range(mixture_num):
pi_now = pi[i]
mu_now = tf.split(mu[i],2,axis=2)
mu_1 = mu_now[0]
mu_2 = mu_now[1]
sigma_now = tf.split(sigma[i],2,axis=2)
sigma_1 = sigma_now[0] + (1-tf.reshape(mask, [-1,max_len,1]))
sigma_2 = sigma_now[1] + (1-tf.reshape(mask, [-1,max_len,1]))
rho_now = rho[i]
Z = tf.divide(tf.square(x1-mu_1),tf.square(sigma_1)) + tf.divide(tf.square(x2-mu_2),tf.square(sigma_2)) - tf.divide(tf.multiply(tf.multiply(x1-mu_1,x2-mu_2),2*rho_now),tf.multiply(sigma_1,sigma_2))
prob = tf.exp(tf.div(tf.negative(Z),2*(1-tf.square(rho_now))))
Normalizing_factor = 2*np.pi*tf.multiply(sigma_1,sigma_2)
Normalizing_factor = tf.multiply(Normalizing_factor,tf.sqrt(1-tf.square(rho_now)))
prob = tf.divide(prob,Normalizing_factor)
prob = tf.multiply(pi_now,prob)
probability = probability + prob
binomial_likelihood = tf.multiply(binomial_param,e) + tf.multiply(1-binomial_param,1-e)
probability = tf.multiply(probability,binomial_likelihood)
probability = probability + (1-tf.reshape(mask,[-1,max_len,1]))
temp_tensor = tf.multiply(mask, tf.log(tf.reshape(probability,[-1,max_len]) + mask*0.00001))
negative_log_likelihood_0 = tf.negative(tf.reduce_sum(temp_tensor,axis=1))
negative_log_likelihood_1 = tf.divide(negative_log_likelihood_0,tf.reshape(tf.cast(sequence_length, dtype=tf.float32), shape=[-1,1]))
negative_log_likelihood_1 = tf.reduce_mean(negative_log_likelihood_1)
tf.summary.scalar("average_per_timestamp_log_likelihood", negative_log_likelihood_1)
negative_log_likelihood = tf.reduce_mean(negative_log_likelihood_0)
with tf.name_scope("train_op"):
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001,momentum=0.9, decay=0.95,epsilon=0.0001)
gvs = optimizer.compute_gradients(negative_log_likelihood)
capped_gvs = []
for grad, var in gvs:
if var.name.__contains__("rnn"):
capped_gvs.append((tf.clip_by_value(grad,-10,10),var))
else:
capped_gvs.append((tf.clip_by_value(grad,-100,100),var))
train_op = optimizer.apply_gradients(capped_gvs)
`
Edit.1. I discovered that I was clipping gradients in a wrong way, the correct way was to introduce a new 'op' as explained by https://github.com/tensorflow/tensorflow/issues/2793 to clip only the output gradients of the whole network and lstm cells.
#tf.custom_gradient
def clip_gradient(x, clip):
def grad(dresult):
return [tf.clip_by_norm(dresult, clip)]
return x, grad
add the lines above to your code and use the function on any variable you want to clip the gradient in back propagation!
I should still see my results.
Edit 2.
The changed Model code is:
from tensorflow.contrib.rnn import RNNCell
from tensorflow.contrib.rnn import LSTMCell
from tensorflow.contrib.rnn import LSTMStateTuple
import tensorflow as tf
import numpy as np
#tf.custom_gradient
def clip_gradient_lstm(x):
def grad(dresult):
return [tf.clip_by_value(dresult,-10,10)]
return x, grad
#tf.custom_gradient
def clip_gradient_output(x):
def grad(dresult):
return [tf.clip_by_value(dresult,-100,100)]
return x, grad
def length_of(seq):
used = tf.sign(tf.reduce_max(tf.abs(seq),axis=2))
length = tf.reduce_sum(used,1)
length = tf.cast(length,tf.int32)
return length
class Custom_Cell(RNNCell):
def __init__(self,forget_bias,bias,one_hot_vector, hidden_layer_nums=[700,700,700], mixture_num=10, attention_num=4):
self.bias = bias
self.lstms = []
for i in hidden_layer_nums:
self.lstms.append(LSTMCell(num_units=i, initializer=tf.truncated_normal_initializer(0.075), dtype=tf.float32, forget_bias=forget_bias))
self.attention_num = attention_num
self.mixture_num = mixture_num
self.state_size = 2*sum(hidden_layer_nums) + 3*self.attention_num
self.attention_var_num = 3*self.attention_num
self.output_size = 6*self.mixture_num + 1 + 1
self.one_hot_vector = one_hot_vector
self.lstm_num = len(hidden_layer_nums)
self.hidden_layer_nums = hidden_layer_nums
temp_shape = self.one_hot_vector.shape
self.char_num = temp_shape[2]
self.i_to_h = []
self.w_to_h = []
self.h_to_h = []
self.prev_h_to_h = []
self.lstm_bias = []
self.lstm_to_attention_weights = tf.get_variable("lstms/first_to_attention_mtrx",shape=[hidden_layer_nums[0],self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.lstm_to_attention_bias = tf.get_variable("lstms/first_to_attention_bias",shape=[self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.all_to_output_mtrx = []
for i in range(self.lstm_num):
self.all_to_output_mtrx.append( tf.get_variable("lstms/to_output_mtrx_" + str(i), shape=[hidden_layer_nums[i],self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.all_to_output_bias = tf.get_variable("lstms/output_bias",shape=[self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
for i in range(self.lstm_num):
self.i_to_h.append(tf.get_variable("lstms/i_to_h_"+str(i),shape=[3,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.w_to_h.append(tf.get_variable("lstms/w_to_h_"+str(i),shape=[self.char_num,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.h_to_h.append(tf.get_variable("lstms/h_to_h_"+str(i),shape=[hidden_layer_nums[i],hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.lstm_bias.append(tf.get_variable("lstms/bias_" + str(i),shape=[hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
if not i == 0:
self.prev_h_to_h.append(
tf.get_variable("lstms/prev_h_to_h_" + str(i), shape=[hidden_layer_nums[i-1], hidden_layer_nums[i]],
dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.075),
trainable=True))
def __call__(self, inputs, state, scope=None):
# Extracting previous configuration and vectors
splitarray = []
for i in self.hidden_layer_nums:
splitarray.append(i)
splitarray.append(i)
splitarray.append(3*self.attention_num)
splitted = tf.split(state,splitarray,axis=1)
prev_tuples = []
for i in range(self.lstm_num):
newtuple = LSTMStateTuple(splitted[2*i],splitted[2*i + 1])
prev_tuples.append(newtuple)
prev_attention_vec = splitted[2*self.lstm_num]
new_attention_vec = 0
next_states = []
most_attended = 0
last_output = 0
for i in range(self.lstm_num):
prev_c, prev_h = prev_tuples[i]
cell = self.lstms[i]
if i == 0:
with tf.name_scope("layer_1"):
w, most_attended = self.gaussian_attention(self.one_hot_vector,prev_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[0]) + tf.matmul(prev_h,self.h_to_h[0]) + tf.matmul(w,self.w_to_h[0]) + self.lstm_bias[0]
_, new_state = cell(input_vec, prev_tuples[0])
new_c, new_h = new_state
new_h = clip_gradient_lstm(new_h)
next_states.append(new_c)
next_states.append(new_h)
last_output = tf.matmul(new_h,self.all_to_output_mtrx[0])
with tf.name_scope("attention_layer"):
temp_attention = tf.matmul(new_h,self.lstm_to_attention_weights) + self.lstm_to_attention_bias
new_alpha, new_beta, new_kappa = tf.split(temp_attention,[self.attention_num,self.attention_num,self.attention_num],axis=1)
old_alpha, old_beta, old_kappa = tf.split(prev_attention_vec,[self.attention_num,self.attention_num,self.attention_num], axis=1)
new_alpha = tf.exp(new_alpha)
new_beta = tf.exp(new_beta)
new_kappa = tf.exp(new_kappa) + old_kappa
new_attention_vec = tf.concat([new_alpha,new_beta,new_kappa],axis=1)
else:
with tf.name_scope("layer_" + str(i)):
w, most_attended = self.gaussian_attention(self.one_hot_vector,new_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[i]) + tf.matmul(next_states[-1],self.prev_h_to_h[i-1]) + tf.matmul(prev_h,self.h_to_h[i]) + tf.matmul(w,self.w_to_h[i]) + self.lstm_bias[i]
_,new_state = cell(input_vec,prev_tuples[i])
new_c, new_h = new_state
new_h = clip_gradient_lstm(new_h)
next_states.append(new_c)
next_states.append(new_h)
last_output = last_output + tf.matmul(new_h, self.all_to_output_mtrx[i])
with tf.name_scope("output"):
last_output = last_output + self.all_to_output_bias
last_output = clip_gradient_output(last_output)
next_states.append(new_attention_vec)
state_to_return = tf.concat(next_states,axis=1)
output_split_param = [1,self.mixture_num,2*self.mixture_num,2*self.mixture_num,self.mixture_num]
binomial_param, pi, mu, sigma, rho = tf.split(last_output,output_split_param,axis=1)
binomial_param = tf.divide(1.,1.+tf.exp(binomial_param))
pi = tf.nn.softmax(tf.multiply(pi,1.+self.bias),axis=1)
mu = mu
sigma = tf.exp(sigma-self.bias)
rho = tf.tanh(rho)
output_to_return = tf.concat([most_attended, binomial_param, pi, mu, sigma, rho],axis=1)
return output_to_return, state_to_return
def state_size(self):
return self.state_size
def output_size(self):
return self.output_size
def gaussian_attention(self,sequence,params):
with tf.name_scope("attention_calculation"):
alpha, beta, kappa = tf.split(params,[self.attention_num,self.attention_num,self.attention_num],axis=1)
seq_shape = sequence.shape
seq_length = seq_shape[1]
temp_vec = np.asarray(range(seq_length),dtype=float)
final_result = 0
alpha = tf.split(alpha,self.attention_num,1)
beta = tf.split(beta,self.attention_num,1)
kappa = tf.split(kappa,self.attention_num,1)
for i in range(self.attention_num):
alpha_now = alpha[i]
beta_now = beta[i]
kappa_now = kappa[i]
result = kappa_now - temp_vec
result = tf.multiply(tf.square(result),tf.negative(beta_now))
result = tf.multiply(tf.exp(result),alpha_now)
final_result = final_result+result
most_attended = tf.argmax(final_result,axis=1)
most_attended = tf.reshape(tf.cast(most_attended,dtype=tf.float32),shape=[-1,1])
final_result = tf.tile(tf.reshape(final_result,[-1,seq_shape[1],1]),[1,1,seq_shape[2]])
to_return = tf.reduce_sum(tf.multiply(final_result,sequence),axis=1)
return to_return, most_attended
and the Training is done by
with tf.name_scope("train_op"):
optimizer =
tf.train.RMSPropOptimizer(learning_rate=0.0001,momentum=0.9, decay=0.95,epsilon=0.0001,centered=True)
train_op = optimizer.minimize(negative_log_likelihood)
and right now is still in training, but it is now as low as -10.

Scrapy returns no output - just a [

I'm trying to run the spider found in this crawler and for simplicity sake I'm using this start_url because it is just a list of 320 movies. (So, the crawler won't run for 5 hours as given in the github page).
I crawl using scrapy crawl imdb -o output.json but the output.json file contains nothing. It has just a [ in it.
import scrapy
from texteval.items import ImdbMovie, ImdbReview
import urlparse
import math
import re
class ImdbSpider(scrapy.Spider):
name = "imdb"
allowed_domains = ["imdb.com"]
start_urls = [
# "http://www.imdb.com/chart/top",
# "http://www.imdb.com/chart/bottom"
"http://www.imdb.com/search/title?countries=csxx&sort=moviemeter,asc"
]
DOWNLOADER_MIDDLEWARES = {
'scrapy.contrib.downloadermiddleware.robotstxt.ROBOTSTXT_OBEY': True,
}
base_url = "http://www.imdb.com"
def parse(self, response):
movies = response.xpath("//*[#id='main']/table/tr/td[3]/a/#href")
for i in xrange(len(movies)):
l = self.base_url + movies[i].extract()
print l
request = scrapy.Request(l, callback=self.parse_movie)
yield request
next = response.xpath("//*[#id='right']/span/a")[-1]
next_url = self.base_url + next.xpath(".//#href")[0].extract()
next_text = next.xpath(".//text()").extract()[0][:4]
if next_text == "Next":
request = scrapy.Request(next_url, callback=self.parse)
yield request
'''
for sel in response.xpath("//table[#class='chart']/tbody/tr"):
url = urlparse.urljoin(response.url, sel.xpath("td[2]/a/#href").extract()[0].strip())
request = scrapy.Request(url, callback=self.parse_movie)
yield request
'''
def parse_movie(self, response):
movie = ImdbMovie()
i1 = response.url.find('/tt') + 1
i2 = response.url.find('?')
i2 = i2 - 1 if i2 > -1 else i2
movie['id'] = response.url[i1:i2]
movie['url'] = "http://www.imdb.com/title/" + movie['id']
r_tmp = response.xpath("//div[#class='titlePageSprite star-box-giga-star']/text()")
if r_tmp is None or r_tmp == "" or len(r_tmp) < 1:
return
movie['rating'] = int(float(r_tmp.extract()[0].strip()) * 10)
movie['title'] = response.xpath("//span[#itemprop='name']/text()").extract()[0]
movie['reviews_url'] = movie['url'] + "/reviews"
# Number of reviews associated with this movie
n = response.xpath("//*[#id='titleUserReviewsTeaser']/div/div[3]/a[2]/text()")
if n is None or n == "" or len(n) < 1:
return
n = n[0].extract().replace("See all ", "").replace(" user reviews", "")\
.replace(" user review", "").replace(",", "").replace(".", "").replace("See ", "")
if n == "one":
n = 1
else:
n = int(n)
movie['number_of_reviews'] = n
r = int(math.ceil(n / 10))
for x in xrange(1, r):
start = x * 10 - 10
url = movie['reviews_url'] + "?start=" + str(start)
request = scrapy.Request(url, callback=self.parse_review)
request.meta['movieObj'] = movie
yield request
def parse_review(self, response):
ranks = response.xpath("//*[#id='tn15content']/div")[0::2]
texts = response.xpath("//*[#id='tn15content']/p")
del texts[-1]
if len(ranks) != len(texts):
return
for i in xrange(0, len(ranks) - 1):
review = ImdbReview()
review['movieObj'] = response.meta['movieObj']
review['text'] = texts[i].xpath("text()").extract()
rating = ranks[i].xpath(".//img[2]/#src").re("-?\\d+")
if rating is None or rating == "" or len(rating) < 1:
return
review['rating'] = int(rating[0])
yield review
Can someone tell me where am I going wrong?

In my opinion, this web site should be load the list of movies use by js. Fristly, I suggest you should check the output about: movies = response.xpath("//*[#id='main']/table/tr/td[3]/a/#href"). If you want to get js content, you can use webkit in scrapy as a downloader middleware.

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

Load completely with request in python (or other ways) - json

Related

No results for the web-scrapping from autoscout24

Why is this error occurring when I use writerow?

wanting to write a txt with Tkinter entry box

handwriting synthesis alex graves

Scrapy returns no output - just a [

Categories

Resources