I am trying to parse a csv file and iterate through it with curl. The following is my data set:
Act No. 2,Sep/1900/28
Act No. 3,Sep/1900/28
Act No. 10,Oct/1900/28
I have followed this Stackoverflow question: CSV into hash to basically create hash for my data set.
Here is my code:
#!/usr/bin/perl
use strict;
use warnings;
use Text::CSV_XS;
use IO::File;
use WWW::Curl::Easy;
my $url = "https://elibrary.judiciary.gov.ph/thebookshelf/docmonth/";
#my $filestoprocess = 'list_acts.csv';
# Usage example:
my $hash_ref = csv_file_hashref('toharvest_og_sourcing.csv');
foreach my $key (sort keys %{$hash_ref}){
my $urlcomplete = "$url"."#{$hash_ref->{$key}}";
#start the curl
my $user_agent = "Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20140319 Firefox/24.0 Iceweasel/24.4.0";
my $curl = WWW::Curl::Easy->new;
$curl->setopt(CURLOPT_HEADER,1);
$curl->setopt(CURLOPT_USERAGENT, $user_agent);
$curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
#$curl->setopt(CURLOPT_SSL_VERIFYPEER, 1L);
#$curl->curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L);
$curl->setopt(CURLOPT_SSL_VERIFYPEER, 0);
$curl->setopt(CURLOPT_URL, $urlcomplete);
# A filehandle, reference to a scalar or reference to a typeglob can be used here.
my $response_body;
$curl->setopt(CURLOPT_WRITEDATA,\$response_body);
# Starts the actual request
my $retcode = $curl->perform;
# Looking at the results...
if ($retcode == 0) {
my $response_code = $curl->getinfo(CURLINFO_HTTP_CODE);
my $curledurldate = $response_body;
our ($issuancelink) = $curledurldate =~ /a href='(https.*?)'>.*?<STRONG>$key/s;
#print "$issuancelink\n";
if (defined $issuancelink) {
my $user_agent = "Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20140319 Firefox/24.0 Iceweasel/24.4.0";
#my $curl = WWW::Curl::Easy->new;
$curl->setopt(CURLOPT_HEADER,1);
$curl->setopt(CURLOPT_USERAGENT, $user_agent);
$curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
#$curl->setopt(CURLOPT_SSL_VERIFYPEER, 1L);
#$curl->curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L);
$curl->setopt(CURLOPT_SSL_VERIFYPEER, 0);
$curl->setopt(CURLOPT_URL, $issuancelink);
# A filehandle, reference to a scalar or reference to a typeglob can be used here.
my $response_body;
$curl->setopt(CURLOPT_WRITEDATA,\$response_body);
# Starts the actual request
my $retcode = $curl->perform;
# Looking at the results...
if ($retcode == 0) {
# print("Transfer went ok\n");
my $response_code = $curl->getinfo(CURLINFO_HTTP_CODE);
my $curledsource = $response_body;
our ($ogsourcing) = $curledsource =~ /<br>\s+(\w+.*?)\s+?<CENTER>.*?H2/s;
my $filename = 'ogsourcingharvested.txt';
open (FH, '>>', $filename) or die("Could not open file. $!");
#print "Error processing ".$fh."$_\n";
print FH $ogsourcing."|"."{$key}\n";
close (FH);
}
else {
# Error code, type of error, error message
print("An error happened: $retcode ".$curl->strerror($retcode)." ".$curl->errbuf."\n");
}
} else {
# Error code, type of error, error message
print("An error happened: $retcode ".$curl->strerror($retcode)." ".$curl->errbuf."\n");
}
}
}
# Implementation:
sub csv_file_hashref {
my ($filename) = #_;
my $csv_fh = IO::File->new($filename, 'r');
my $csv = Text::CSV_XS->new ();
my %output_hash;
while(my $colref = $csv->getline ($csv_fh))
{
$output_hash{shift #{$colref}} = $colref;
}
return \%output_hash;
}
Basically, the code iterates through the second column, add that to the end of a URL, then that URL is curled. Afterwards, the content of the curled URL is searched for a particular content:
our ($issuancelink) = $curledurldate =~ /a href='(https.*?)'>.*?<STRONG>$key/s;
When that link shows up in search, that is put into a variable ($issuancelink) and then that variable $issuancelink is curled. Then a particular text in the curled file is searched, after which that particular text is captured and saved to a text file. However, my code is good if the second column (Sep/1900/28, Oct/1900/28 in this case) aren't repeated. However, if it's repeated, that is where I am having a problem, it seems the first iteration is the one being captured. So in my case, the link for Act No. 3 which has same originating URL (https://elibrary.judiciary.gov.ph/thebookshelf/docmonth/Sep/1900/28) as Act No. 2 (https://elibrary.judiciary.gov.ph/thebookshelf/docmonth/Sep/1900/28), the link for Act No. 2 is instead the one captured.
Thanks in advance!
However, my code is good if the second column (Sep/1900/28, Oct/1900/28 in this case) aren't repeated.
When you store values in a hash, the hash keys are unique. That means that if you have identical key names, they will overwrite each other.
This part of your code:
while(my $colref = $csv->getline ($csv_fh))
{
$output_hash{shift #{$colref}} = $colref;
}
Seems to be responsible. What you can do is to save the values in an array instead of a scalar (in this case, in an array ref).
I would do something like this:
while(my $colref = $csv->getline ($csv_fh))
{
my ($key, $value) = #$colref;
push #{$output_hash{$key}}, $value; # store values in array
}
Another benefit of this is that the values are copied. In your code, the array ref is copied. You are saved from problems by the limited scope of your variable my $colref, but generally speaking copying the values will save you from problems.
To access the array values you will probably need to loop for each hash key. Something like
for my $key (sort keys %$hash_ref) {
for my $values (#{$hash_ref{$key}}) {
# do stuff...
Related
so I have to update a perl script that goes through a json file, fetches keys called “items”, and transforms these items into perl output.
I’m a noob at Perl/coding in general, so plz bear with me🥺. The offset variable is set as each url is iterated through. A curl command is passed to the terminal, the file is put through a "#lines" array, and in the end, whatever json data is stored in $data gets decoded and transformed. and in the blocks below (where # populate %manager_to_directs, # populate %user_to_management_chain, and # populate %manager_to_followers are commented) is where fetch_json gets called and where the hash variables get the data from the decoded json. (***Please feel free to correct me if I interpreted this code incorrectly)
There’s been a problem where the $cmd doesn’t account for the HTTP Responses every time this program is executed. I only want the results to be processed if and only if the program gets http 200 (OK) or http 204 (NO_CONTENT) because the program will run and sometimes partially refresh our json endpoint (url in curl command output from terminal below), or sometimes doesn’t even refresh at all.
All I’m assuming is that I’d probably have to import the HTTP::Response pragma and somehow pull that out of the commands being run in fetch_json, but I have no other clue where to go from there.
Would I have to update the $cmd to pull the http code? And if so, how would I interpret that in the fetch_json sub to exit the process if anything other than 200 or 204 is received?
Oh and also, how would I save the previous output from the last execution in a backup file?
Any help I can get here would be highly appreciated!
See code below:
Pulling this from a test run:
curl -o filename -w "HTTP CODE: %{http_code}\n" --insecure --key <YOUR KEY> --cert <YOUR CERT> https://xxxxxxxxxx-xxxxxx-xxxx.xxx.xxxxxxxxxx.com:443/api/v1/reports/active/week > http.out
#!/usr/bin/env perl
use warnings;
use strict;
use JSON qw(decode_json);
use autodie qw(open close chmod unlink);
use File::Basename;
use File::Path qw(make_path rmtree);
use Cwd qw(abs_path);
use Data::Dumper;
use feature qw(state);
sub get_fetched_dir {
return "$ENV{HOME}/tmp/mule_user_fetched";
}
# fetch from mulesoft server and save local copy
sub fetch_json {
state $now = time();
my ($url) = #_;
my $dir = get_fetched_dir();
if (!-e $dir) {
make_path($dir);
chmod 0700, $dir;
}
my ($offset) = $url =~ m{offset=(\d+)};
if (!defined $offset) {
$offset = 0;
}
$offset = sprintf ("%03d", $offset);
my $filename = "$dir/offset${offset}.json";
print "$filename\n";
my #fields = stat $filename;
my $size = $fields[7];
my $mtime = $fields[9];
if (!$size || !$mtime || $now-$mtime > 24*60*60) {
my $cmd = qq(curl \\
--insecure \\
--silent \\
--key $ENV{KEY} \\
--cert $ENV{CERT} \\
$url > $filename
);
#print $cmd;
system($cmd);
chmod 0700, $filename;
}
open my $fh, "<", $filename;
my #lines = <$fh>;
close $fh;
return undef if !#lines;
my $data;
eval {
$data = decode_json (join('',#lines));
};
if ($#) {
unlink $filename;
print "Bad JSON detected in $filename.\n";
print "I have deleted $filename.\n";
print "Please re-run script.\n";
exit(1);
}
return $data;
}
die "Usage:\n KEY=key_file CERT=cert_file mule_to_jira.pl\n"
if !defined $ENV{KEY} || !defined $ENV{CERT};
print "fetching data from mulesoft\n";
# populate %manager_to_directs
my %manager_to_directs;
my %user_to_manager;
my #users;
my $url = "https://enterprise-worker-data.eip.vzbuilders.com/api/v1/reports/active/week";
while ($url && $url ne "Null") {
my $data = fetch_json($url);
last if !defined $data;
$url = $data->{next};
#print $url;
my $items = $data->{items};
foreach my $item (#$items) {
my $shortId = $item->{shortId};
my $manager = $item->{organization}{manager};
push #users, $shortId;
next if !$manager;
$user_to_manager{$shortId} = $manager;
push #{$manager_to_directs{$manager}}, $shortId;
}
}
# populate %user_to_management_chain
# populate %manager_to_followers
my %user_to_management_chain;
my %manager_to_followers;
foreach my $user (keys %user_to_manager) {
my $manager = $user_to_manager{$user};
my $prev = $user;
while ($manager && $prev ne $manager) {
push #{$manager_to_followers{$manager}}, $user;
push #{$user_to_management_chain{$user}}, $manager;
$prev = $manager;
$manager = $user_to_manager{$manager}; # manager's manager
}
}
# write backyard.txt
open my $backyard_fh, ">", "backyard.txt";
foreach my $user (sort keys %user_to_management_chain) {
my $chain = join ',', #{$user_to_management_chain{$user}};
print $backyard_fh "$user:$chain\n";
}
close $backyard_fh;
# write teams.txt
open my $team_fh, ">", "teams.txt";
foreach my $user (sort #users) {
my $followers = $manager_to_followers{$user};
my $followers_joined = $followers ? join (',', sort #$followers) : "";
print $team_fh "$user:$followers_joined\n";
}
close $team_fh;
my $dir = get_fetched_dir();
rmtree $dir, {safe => 1};
So, if you want to keep the web fetch and the Perl processing decoupled, you can modify the curl command so that it includes the response header in the output by adding the -i option. That means that the Perl will have to be modified to read and process the headers before getting to the body. A successful http.out will look something like this:
HTTP/1.1 200 OK
Server: somedomain.com
Date: <date retrieved>
Content-Type: application/json; charset=utf-8
Content-Length: <size of JSON>
Status: 200 OK
Maybe: More Headers
Blank: Line signals start of body
{
JSON object here
}
An unsuccessful curl will have something other than 200 OK on the first line next to the HTTP/1.1, so you can tell that something went wrong.
Alternatively, you can let the Perl do the actual HTTP fetch instead of relying on curl; you can use LWP::UserAgent or any of a number of other HTTP client libraries in Perl, which will give you the entire response, not just the body.
I am getting a "csv file" from a vendor (using their API), but what they do is just spew the whole thing into their response. It wouldn't be a significant problem except that, of course, some of those pesky humans entered the data and put in "features" like line breaks. What I am doing now is creating a file for the raw data and then reopening it to read the data:
open RAW, ">", "$rawfile" or die "ERROR: Could not open $rawfile for write: $! \n";
print RAW $response->content;
close RAW;
my $csv = Text::CSV_XS->new({ binary=>1,always_quote=>1,eol=>$/ });
open my $fh, "<", "$rawfile" or die "ERROR: Could not open $rawfile for read: $! \n";
while ( $line = $csv->getline ($fh) ) { ...
Somehow this seems ... inelegant. It seems that I ought to be able to just read the data from the $response->content (multiline string) as if it were a file. But I'm drawing a total blank on how do this.
A pointer would be greatly appreciated.
Thanks,
Paul
You could use a string filehandle:
my $data = $response->content;
open my $fh, "<", \$data or croak "unable to open string filehandle : $!";
my $csv = Text::CSV_XS->new({ binary=>1,always_quote=>1,eol=>$/ });
while ( $line = $csv->getline ($fh) ) { ... }
Yes, you can use Text::CSV_XS on a string, via its functional interface
use warnings;
use strict;
use feature 'say';
use Text::CSV_XS qw(csv); # must use _XS version
my $csv = qq(a,line\nand,another);
my $aoa = csv(in => \$csv)
or die Text::CSV->error_diag;
say "#$_" for #aoa;
Note that this indeed needs Text::CSV_XS (normally Text::CSV works but not with this).
I don't know why this isn't available in the OO interface (or perhaps is but is not documented).
While the above parses the string directly as asked, one can also lessen the "inelegant" aspect in your example by writing content directly to a file as it's acquired, what most libraries support like with :content_file option in LWP::UserAgent::get method.
Let me also note that most of the time you want the library to decode content, so for LWP::UA to use decoded_content (see HTTP::Response).
I cooked up this example with Mojo::UserAgent. For the CSV input I used various data sets from the NYC Open Data. This is also going to appear in the next update for Mojo Web Clients.
I build the request without making the request right away, and that gives me the transaction object, $tx. I can then replace the read event so I can immediately send the lines into Text::CSV_XS:
#!perl
use v5.10;
use Mojo::UserAgent;
my $ua = Mojo::UserAgent->new;
my $url = ...;
my $tx = $ua->build_tx( GET => $url );
$tx->res->content->unsubscribe('read')->on(read => sub {
state $csv = do {
require Text::CSV_XS;
Text::CSV_XS->new;
};
state $buffer;
state $reader = do {
open my $r, '<:encoding(UTF-8)', \$buffer;
$r;
};
my ($content, $bytes) = #_;
$buffer .= $bytes;
while (my $row = $csv->getline($reader) ) {
say join ':', $row->#[2,4];
}
});
$tx = $ua->start($tx);
That's not as nice as I'd like it to be because all the data still show up in the buffer. This is slightly more appealing, but it's fragile in the ways I note in the comments. I'm too lazy at the moment to make it any better because that gets hairy very quickly as you figure out when you have enough data to process a record. My particular code isn't as important as the idea that you can do whatever you like as the transactor reads data and passes it into the content handler:
use v5.10;
use strict;
use warnings;
use feature qw(signatures);
no warnings qw(experimental::signatures);
use Mojo::UserAgent;
my $ua = Mojo::UserAgent->new;
my $url = ...;
my $tx = $ua->build_tx( GET => $url );
$tx->res->content
->unsubscribe('read')
->on( read => process_bytes_factory() );
$tx = $ua->start($tx);
sub process_bytes_factory {
return sub ( $content, $bytes ) {
state $csv = do {
require Text::CSV_XS;
Text::CSV_XS->new( { decode_utf8 => 1 } );
};
state $buffer = '';
state $line_no = 0;
$buffer .= $bytes;
# fragile if the entire content does not end in a
# newline (or whatever the line ending is)
my $last_line_incomplete = $buffer !~ /\n\z/;
# will not work if the format allows embedded newlines
my #lines = split /\n/, $buffer;
$buffer = pop #lines if $last_line_incomplete;
foreach my $line ( #lines ) {
my $status = $csv->parse($line);
my #row = $csv->fields;
say join ':', $line_no++, #row[2,4];
}
};
}
I used an anonymous hash to pass value from two different subroutines to a new subroutine. But, now I'm not able to perform calculations using the passed variables.
use warnings;
use strict;
use feature 'say';
use DBI;
use autodie;
use Data::Dumper;
use CGI;
print "Enter sequence";
my $seq = <STDIN>;
chomp $seq;
$len = length $seq;
my $f = nuc($seq);
perc({ len => $len });
sub nuc {
my ($c) = #_;
chomp $c;
my $len = length $c;
for (my $i = 0; $i< = $len; $i++) {
my $seq2 = substr($c, $i, 1);
$nuc=$nuc . $seq2;
chomp $nuc;
}
my $l = perc({nuc => $nuc});
}
sub perc {
my $params = shift;
my $k = $params->{nuc};
my $w = $params->{len};
my $db = "hnf1a";
my $user = "root";
my $password = "";
my $host = "localhost";
my $dbh = DBI->connect("DBI:mysql:database=$db:$host",$user,$password);
my $sth = $dbh->prepare('SELECT COUNT(*) FROM mody where nm = ?');
for (1..100) {
$sth->execute(int(rand(10)));
}
chomp (my $input = $k);
my #num = split /':'/, $input;
for my $num(#num) {
say "rows matching input nuc <$num>:";
$sth->execute($num);
my $count = $sth->fetchrow_array;
say "$count";
$u += $count;
}
}
$h = $u / $w;
print $h;
I passed the variables : $nuc and $len to the last subroutine 'perc' by declaring an anonymous hash.
When I use these variables to perform calculations I don't get a proper answer.
For the above division performed I got a statement as 'Illegal division'.
Please help me out. Thanks in advance.
You are making two separate calls to perc, each with only one of the required values in the hash. You can't do that: the subroutine won't "remember" a value passed to it across separate calls unless you write the code to do that
You need to collect all the values and pass them in a single call to perc
There are rather a lot of misunderstandings here. Let's go through your code.
use CGI;
Using CGI.pm is a bit dated, but it's not a terrible idea if you're writing a CGI program. But this isn't a CGI program, so this isn't necessary.
print "Enter sequence";
my $seq = <STDIN>;
chomp $seq;
$len = length $seq;
my $f = nuc($seq);
This looks OK. You prompt the user, get some input, remove the newline from the end of the input, get the length of the input and then pass your input into nuc().
So, let's look at nuc() - which could probably have a better name!
sub nuc {
my ($c) = #_;
chomp $c;
my $len = length $c;
for (my $i = 0; $i< = $len; $i++) {
my $seq2 = substr($c, $i, 1);
$nuc=$nuc . $seq2;
chomp $nuc;
}
my $l = perc({nuc => $nuc});
}
You get the parameter that has been passed in and remove the newline from the end of it (which does nothing as this is $seq which has already had its newline removed). You then get the length of this string (again!)
Then it gets very strange. Firstly, there's a syntax error (< = should be <=). Then you use a C-style for loop together with substr() too... well, basically you just copy $c to $nuc in a really inefficient manner. So this subroutine could be written as:
sub nuc {
my ($c) = #_;
$nuc = $c;
my $l = perc({ nuc => $nuc });
}
Oh, and I don't know why you chomp($nuc) each time round the loop.
Two more strange things. Firstly, you don't declare $nuc anywhere, and you have use strict in your code. Which means that this code doesn't even compile. (Please don't waste our time with code that doesn't compile!) And secondly, you don't explicitly return a value from nuc(), but you store the return value in $f. Because of the way Perl works, this subroutine will return the value in $l. But it's best to be explicit.
Then there's your perc() subroutine.
sub perc {
my $params = shift;
my $k = $params->{nuc};
my $w = $params->{len};
my $db = "hnf1a";
my $user = "root";
my $password = "";
my $host = "localhost";
my $dbh = DBI->connect("DBI:mysql:database=$db:$host",$user,$password);
my $sth = $dbh->prepare('SELECT COUNT(*) FROM mody where nm = ?');
for (1..100) {
$sth->execute(int(rand(10)));
}
chomp (my $input = $k);
my #num = split /':'/, $input;
for my $num(#num) {
say "rows matching input nuc <$num>:";
$sth->execute($num);
my $count = $sth->fetchrow_array;
say "$count";
$u += $count;
}
}
You get the hash ref which is passed in an store that in $params. You then extract the nuc and len values from that hash and store them in variables called $k and $w (you really need to improve your variable and subroutine names!) But each call to perc only has one of those values set - so only one of your two variables get a value, the other will be undef.
So then you connect to the database. And you run a select query a hundred times passing in random integers between 0 and 9. And ignore the value returned from the select statement. Which is bizarre and pointless.
Eventually, you start doing something with one of your input parameters, $k (the other, $w, is completely ignored). You copy it into another scalar variable before splitting it into an array. You then run the same SQL select statement once for each element in that array and add the number you get back to the running total in $u. And $u is another variable that you never declare, so (once again) this code doesn't compile.
Outside of your subroutines, you then do some simple maths with $u (an undeclared variable) and $w (a variable that was declared in a different scope) and store the result in $h (another undeclared variable).
I really don't understand what this code is supposed to do. And, to be honest, I don't think you do too. If you're at school, then you need to go back to your teacher and say that you have no idea what you are doing. If you're in a job, you need to tell your boss that you're not the right person for this task.
Either way, if you want to be a programmer, you need to go right back to the start and cover the very basics again.
I'm trying to write a program to fetch a big MySQL table, rename some fields and write it to JSON. Here is what I have for now:
use strict;
use JSON;
use DBI;
# here goes some statement preparations and db initialization
my $rowcache;
my $max_rows = 1000;
my $LIMIT_PER_FILE = 100000;
while ( my $res = shift( #$rowcache )
|| shift( #{ $rowcache = $sth->fetchall_arrayref( undef, $max_rows ) } ) ) {
if ( $cnt % $LIMIT_PER_FILE == 0 ) {
if ( $f ) {
print "CLOSE $fname\n";
close $f;
}
$filenum++;
$fname = "$BASEDIR/export-$filenum.json";
print "OPEN $fname\n";
open $f, ">$fname";
}
$res->{some_field} = $res->{another_field}
delete $res->{another_field}
print $f $json->encode( $res ) . "\n";
$cnt++;
}
I used the database row caching technique from
Speeding up the DBI
and everything seems good.
The only problem I have for now is that on $res->{some_field} = $res->{another_field}, the row interpreter complains and says that $res is Not a HASH reference.
Please could anybody point me to my mistakes?
If you want fetchall_arrayref to return an array of hashrefs, the first parameter should be a hashref. Otherwise, an array of arrayrefs is returned resulting in the "Not a HASH reference" error. So in order to return full rows as hashref, simply pass an empty hash:
$rowcache = $sth->fetchall_arrayref({}, $max_rows)
I'm trying to automate hotmail login. How can I find what the appropriate fields are? When I print the forms I just get a bunch of hex information.
what's the correct method and how is it used?
use WWW::Mechanize;
use LWP::UserAgent;
my $mech = WWW::Mechanize->new();
my $url = "http://hotmail.com";
$mech->get($url);
print "Forms: $mech->forms";
if ($mech->success()){
print "Successful Connection\n";
} else {
print "Not a successful connection\n"; }
this may help you
use WWW::Mechanize;
use Data::Dumper;
my $mech = WWW::Mechanize->new();
my $url = "http://yoururl.com";
$mech->get($url);
my #forms = $mech->forms;
foreach my $form (#forms) {
my #inputfields = $form->param;
print Dumper \#inputfields;
}
Sometimes it is useful to look at what the web site is asking in advance of coding up a reader or interface to it.
I wrote this bookmarklet that you save in your browser bookmarks and when you click it while visiting any html web page will show in a pop-up all the forms actions and fields with values even hidden. Simply copy the text below and paste into a new bookmark location field, name it and save.
javascript:t=%22<TABLE%20BORDER='1'%20BGCOLOR='#B5D1E8'>%22;for(i=0;i<document.forms.length;i++){t+=%22<TR><TH%20colspan='4'%20align='left'%20BGCOLOR='#336699'>%22;t+=%22<FONT%20color='#FFFFFF'>%20Form%20Name:%20%22;t+=document.forms[i].name;t+=%22</FONT></TH></TR>%22;t+=%22<TR><TH%20colspan='4'%20align='left'%20BGCOLOR='#99BADD'>%22;t+=%22<FONT%20color='#FFFFFF'>%20Form%20Action:%20%22;t+=document.forms[i].action;t+=%22</FONT></TH></TR>%22;t+=%22<TR><TH%20colspan='4'%20align='left'%20BGCOLOR='#99BADD'>%22;t+=%22<FONT%20color='#FFFFFF'>%20Form%20onSubmit:%20%22;t+=document.forms[i].onSubmit;t+=%22</FONT></TH></TR>%22;t+=%22<TR><TH>ID:</TH><TH>Element%20Name:</TH><TH>Type:</TH><TH>Value:</TH></TR>%22;for(j=0;j<document.forms[i].elements.length;j++){t+=%22<TR%20BGCOLOR='#FFFFFF'><TD%20align='right'>%22;t+=document.forms[i].elements[j].id;t+=%22</TD><TD%20align='right'>%22;t+=document.forms[i].elements[j].name;t+=%22</TD><TD%20align='left'>%20%22;t+=document.forms[i].elements[j].type;t+=%22</TD><TD%20align='left'>%20%22;if((document.forms[i].elements[j].type==%22select-one%22)%20||%20(document.forms[i].elements[j].type==%22select-multiple%22)){t_b=%22%22;for(k=0;k<document.forms[i].elements[j].options.length;k++){if(document.forms[i].elements[j].options[k].selected){t_b+=document.forms[i].elements[j].options[k].value;t_b%20+=%20%22%20/%20%22;t_b+=document.forms[i].elements[j].options[k].text;t_b+=%22%20%22;}}t+=t_b;}else%20if%20(document.forms[i].elements[j].type==%22checkbox%22){if(document.forms[i].elements[j].checked==true){t+=%22True%22;}else{t+=%22False%22;}}else%20if(document.forms[i].elements[j].type%20==%20%22radio%22){if(document.forms[i].elements[j].checked%20==%20true){t+=document.forms[i].elements[j].value%20+%20%22%20-%20CHECKED%22;}else{t+=document.forms[i].elements[j].value;}}else{t+=document.forms[i].elements[j].value;}t+=%22</TD></TR>%22;}}t+=%22</TABLE>%22;mA='menubar=yes,scrollbars=yes,resizable=yes,height=800,width=600,alwaysRaised=yes';nW=window.open(%22/empty.html%22,%22Display_Vars%22,%20mA);nW.document.write(t);
I tried to mimc the post request that sends your login info, but the web site seems to be dynamically adding a bunch of id's ---long generated strings etc to the url and I couldn't figure out how to imitate them. So I wrote the hacky work-around below.
#!/usr/bin/perl
use strict;
use warnings;
use WWW::Curl::Easy;
use Data::Dumper;
my $curl = WWW::Curl::Easy->new;
#this is the name and complete path to the new html file we will create
my $new_html_file = 'XXXXXXXXX';
my $password = 'XXXXXXXX';
my $login = 'XXXXXXXXX';
#escape the .
$login =~ s/\./\\./g;
my $html_to_insert = qq(<script src="//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min.js"></script><script type="text/javascript">setTimeout('testme()', 3400);function testme(){document.getElementById('res_box').innerHTML = '<h3 class="auto_click_login_np">Logging in...</h3>';document.f1.passwd.value = '$password';document.f1.login.value = '$login';\$("#idSIButton9").trigger("click");}var counter = 5;setInterval('countdown()', 1000);function countdown(){document.getElementById('res_box').innerHTML = '<h3 class="auto_click_login_np">You should be logged in within ' + counter + ' seconds</h3>';counter--;}</script><h2 style="background-color:#004c00; color: #fff; padding: 4px;" id="res_box" onclick="testme()" class="auto_click_login">If you are not logged in after a few seconds, click here.</h2>);
$curl->setopt(CURLOPT_HEADER,1);
my $url = 'https://login.live.com';
$curl->setopt(CURLOPT_URL, $url);
# A filehandle, reference to a scalar or reference to a typeglob can be used here.
my $response_body;
$curl->setopt(CURLOPT_WRITEDATA, \$response_body);
open( my $fresh_html_handle, '+>', 'fresh_html_from_login_page.html');
# Starts the actual request
my $curl_return_code = $curl->perform;
# Looking at the results...
if ($curl_return_code == 0) {
print("Transfer went ok\n");
my $response_code = $curl->getinfo(CURLINFO_HTTP_CODE);
# judge result and next action based on $response_code
print $fresh_html_handle $response_body;
} else {
# Error code, type of error, error message
print("An error happened: $curl_return_code ".$curl->strerror($curl_return_code)." ".$curl->errbuf."\n");
}
close($fresh_html_handle);
#erase whatever a pre-existing edited file if there is one
open my $erase_html_handle, ">", $new_html_file or die "Hork! $!\n";
print $erase_html_handle;
close $erase_html_handle;
#open the file with the login page html
open( FH, '<', 'fresh_html_from_login_page.html');
open( my $new_html_handle, '>>', $new_html_file);
my $tracker=0;
while( <FH> ){
if( $_ =~ /DOCTYPE/){
$tracker=1;
print $new_html_handle $_;
} elsif($_ =~ /<\/body><\/html>/){
#now add the javascript and html to automatically log the user in
print $new_html_handle "$html_to_insert\n$_";
}elsif( $tracker == 1){
print $new_html_handle $_;
}
}
close(FH);
close($new_html_handle);
my $sys_call_res = system("firefox file:///usr/bin/outlook_auto_login.html");
print "\n\nresult: $sys_call_res\n\n";