Skip to content

Commit 363570f

Browse files
authored
Merge pull request #49 from metacpan/mickey/external
External script
2 parents 80cdeec + 66b74c0 commit 363570f

File tree

3 files changed

+297
-0
lines changed

3 files changed

+297
-0
lines changed

bin/external.pl

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
use strict;
2+
use warnings;
3+
use v5.36;
4+
5+
use Email::Sender::Simple ();
6+
use Email::Simple ();
7+
use Getopt::Long;
8+
use MetaCPAN::Logger qw< :log :dlog >;
9+
10+
use MetaCPAN::ES;
11+
use MetaCPAN::External::Cygwin qw< run_cygwin >;
12+
use MetaCPAN::External::Debian qw< run_debian >;
13+
14+
# with(
15+
# 'MetaCPAN::Script::Role::External::Cygwin',
16+
# 'MetaCPAN::Script::Role::External::Debian',
17+
# );
18+
19+
# args
20+
my ( $email_to, $external_source );
21+
GetOptions(
22+
"email_to=s" => \$email_to,
23+
"external_source=s" => \$external_source,
24+
);
25+
26+
die "wrong external source: $external\n"
27+
unless $external_source
28+
and grep { $_ eq $external_source } qw< cygwin debian >;
29+
30+
# setup
31+
my $es = MetaCPAN::ES->new( type => "author" );
32+
33+
my $ret;
34+
35+
$ret = run_cygwin() if $external_source eq 'cygwin';
36+
$ret = run_debian() if $external_source eq 'debian';
37+
38+
my $email_body = $ret->{errors_email_body};
39+
if ( $email_to and $email_body ) {
40+
my $email = Email::Simple->create(
41+
header => [
42+
'Content-Type' => 'text/plain; charset=utf-8',
43+
To => $email_to,
44+
From => '[email protected]',
45+
Subject => "Package mapping failures report for $external_source",
46+
'MIME-Version' => '1.0',
47+
],
48+
body => $email_body,
49+
);
50+
Email::Sender::Simple->send($email);
51+
52+
log_debug { "Sending email to " . $email_to . ":" };
53+
log_debug {"Email body:"};
54+
log_debug {$email_body};
55+
}
56+
57+
my $scroll = $es->scroll(
58+
type => 'distribution',
59+
scroll => '10m',
60+
body => {
61+
query => {
62+
exists => { field => "external_package." . $external_source }
63+
}
64+
},
65+
);
66+
67+
my @to_remove;
68+
69+
while ( my $s = $scroll->next ) {
70+
my $name = $s->{_source}{name};
71+
next unless $name;
72+
73+
if ( exists $dist->{$name} ) {
74+
delete $dist->{$name}
75+
if $dist->{$name} eq
76+
$s->{_source}{external_package}{$external_source};
77+
}
78+
else {
79+
push @to_remove => $name;
80+
}
81+
}
82+
83+
my $bulk = $es->bulk( type => 'distribution' );
84+
85+
for my $d ( keys %{$dist} ) {
86+
log_debug {"[$external_source] adding $d"};
87+
$bulk->update( {
88+
id => $d,
89+
doc => +{
90+
'external_package' => {
91+
$external_source => $dist->{$d}
92+
}
93+
},
94+
doc_as_upsert => 1,
95+
} );
96+
}
97+
98+
for my $d (@to_remove) {
99+
log_debug {"[$external_source] removing $d"};
100+
$bulk->update( {
101+
id => $d,
102+
doc => +{
103+
'external_package' => {
104+
$external_source => undef
105+
}
106+
}
107+
} );
108+
}
109+
110+
$bulk->flush;
111+
112+
1;
113+
114+
=pod
115+
116+
=head1 SYNOPSIS
117+
118+
# bin/external.pl --external_source SOURCE --email_to EMAIL
119+
120+
=cut

lib/MetaCPAN/External/Cygwin.pm

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package MetaCPAN::External::Cygwin;
2+
3+
use List::Util qw< shuffle >;
4+
use MetaCPAN::Logger qw< :log :dlog >;
5+
6+
use MetaCPAN::Ingest qw< ua >;
7+
8+
use Sub::Exporter -setup => {
9+
exports => [ qw<
10+
run_cygwin
11+
> ]
12+
};
13+
14+
sub run_cygwin () {
15+
my $ret = {};
16+
17+
my $ua = ua();
18+
my $mirrors = get_mirrors($ua);
19+
20+
my @mirrors = @{ $mirrors };
21+
my $timeout = $ua->timeout(10);
22+
23+
MIRROR: {
24+
my $mirror = shift @mirrors or die "Ran out of mirrors";
25+
log_debug {"Trying mirror: $mirror"};
26+
my $res = $ua->get( $mirror . 'x86_64/setup.ini' );
27+
redo MIRROR unless $res->is_success;
28+
29+
my @packages = split /^\@ /m, $res->decoded_content;
30+
shift @packages; # drop headers
31+
32+
log_debug { sprintf "Got %d cygwin packages", scalar @packages };
33+
34+
for my $desc (@packages) {
35+
next if substr( $desc, 0, 5 ) ne 'perl-';
36+
my ( $pkg, %attr ) = map s/\A"|"\z//gr, map s/ \z//r,
37+
map s/\n+/ /gr, split /^([a-z]+): /m, $desc;
38+
$attr{category} = [ split / /, $attr{category} ];
39+
next if grep /^(Debug|_obsolete)$/, @{ $attr{category} };
40+
$ret->{dist}{ $pkg =~ s/^perl-//r } = $pkg;
41+
}
42+
}
43+
$ua->timeout($timeout);
44+
45+
log_debug {
46+
sprintf "Found %d cygwin-CPAN packages",
47+
scalar keys %{ $ret->{dist} }
48+
};
49+
50+
return $ret;
51+
}
52+
53+
sub _get_mirrors ( $ua ) {
54+
log_debug {"Fetching mirror list"};
55+
my $res = $ua->get('https://cygwin.com/mirrors.lst');
56+
die "Failed to fetch mirror list: " . $res->status_line
57+
unless $res->is_success;
58+
my @mirrors = shuffle map +( split /;/ )[0], split /\n/,
59+
$res->decoded_content;
60+
61+
log_debug { sprintf "Got %d mirrors", scalar @mirrors };
62+
return \@mirrors;
63+
}
64+
65+
1;

lib/MetaCPAN/External/Debian.pm

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
package MetaCPAN::External::Debian;
2+
3+
use strict;
4+
use warnings;
5+
use v5.36;
6+
7+
use CPAN::DistnameInfo ();
8+
use DBI ();
9+
10+
use MetaCPAN::ES;
11+
12+
use Sub::Exporter -setup => {
13+
exports => [ qw<
14+
run_debian
15+
> ]
16+
};
17+
18+
sub run_debian () {
19+
my $ret = {};
20+
21+
my $host_regex = _get_host_regex();
22+
23+
# connect to the database
24+
my $dbh = DBI->connect( "dbi:Pg:host=udd-mirror.debian.net;dbname=udd",
25+
'udd-mirror', 'udd-mirror' );
26+
27+
# special cases
28+
my %skip = ( 'libbssolv-perl' => 1 );
29+
30+
# multiple queries are needed
31+
my @sql = (
32+
33+
# packages with upstream identified as CPAN
34+
q{select u.source, u.upstream_url from upstream_metadata um join upstream u on um.source = u.source where um.key='Archive' and um.value='CPAN'},
35+
36+
# packages which upstream URL pointing to CPAN
37+
qq{select source, upstream_url from upstream where upstream_url ~ '${\$host_regex}'},
38+
);
39+
40+
my @failures;
41+
42+
for my $sql (@sql) {
43+
my $sth = $dbh->prepare($sql);
44+
$sth->execute();
45+
46+
# map Debian source package to CPAN distro
47+
while ( my ( $source, $url ) = $sth->fetchrow ) {
48+
next if $skip{$source};
49+
if ( my $dist = dist_for_debian( $source, $url ) ) {
50+
$ret->{dist}{$dist} = $source;
51+
}
52+
else {
53+
push @failures => [ $source, $url ];
54+
}
55+
}
56+
}
57+
58+
if (@failures) {
59+
my $ret->{errors_email_body} = join "\n" =>
60+
map { sprintf "%s %s", $_->[0], $_->[1] // '<undef>' } @failures;
61+
}
62+
63+
return $ret;
64+
}
65+
66+
sub dist_for_debian ( $source, $url ) {
67+
my %alias = (
68+
'datapager' => 'data-pager',
69+
'html-format' => 'html-formatter',
70+
);
71+
72+
my $dist = CPAN::DistnameInfo->new($url);
73+
if ( $dist->dist ) {
74+
return $dist->dist;
75+
}
76+
elsif ( $source =~ /^lib(.*)-perl$/ ) {
77+
my $es = MetaCPAN::ES->new( type => 'release' );
78+
my $res = $es->scroll(
79+
body => {
80+
query => {
81+
term => { 'distribution.lowercase' => $alias{$1} // $1 }
82+
},
83+
sort => [ { 'date' => 'desc' } ],
84+
}
85+
)->next;
86+
87+
return $res->{_source}{distribution}
88+
if $res;
89+
}
90+
91+
return;
92+
}
93+
94+
sub _get_host_regex () {
95+
my @cpan_hosts = qw<
96+
backpan.cpan.org
97+
backpan.perl.org
98+
cpan.metacpan.org
99+
cpan.noris.de
100+
cpan.org
101+
cpan.perl.org
102+
search.cpan.org
103+
www.cpan.org
104+
www.perl.com
105+
>;
106+
107+
return
108+
'^(https?|ftp)://('
109+
. join( '|', map {s/\./\\./r} @cpan_hosts ) . ')/';
110+
}
111+
112+
1;

0 commit comments

Comments
 (0)