I have a list of million urls. I need to extract the TLD for each url and create multiple files for each TLD. For example collect all urls with .com as tld and dump that in
host
method to get the host,get_root_domain
to parse the host name.tld
or suffix
method to get the real TLD or the pseudo TLD.
use feature qw( say );
use Domain::PublicSuffix qw( );
use URI qw( );
my $dps = Domain::PublicSuffix->new();
for (qw(
http://www.google.com/
http://www.google.co.uk/
)) {
my $url = $_;
# Treat relative URLs as absolute URLs with missing http://.
$url = "http://$url" if $url !~ /^\w+:/;
my $host = URI->new($url)->host();
$host =~ s/\.\z//; # D::PS doesn't handle "domain.com.".
$dps->get_root_domain($host)
or die $dps->error();
say $dps->tld(); # com uk
say $dps->suffix(); # com co.uk
}