package MusicBrainz::Server::Filters; use strict; use warnings; use Digest::MD5 qw( md5_hex ); use Encode; use Locale::Language; use MusicBrainz::Server::Track; use MusicBrainz::Server::Validation qw( encode_entities ); use MusicBrainz::Server::Constants qw( %ENTITIES entities_with ); use Text::Trim qw( trim ); use Text::WikiFormat; use Try::Tiny; use URI::Escape; use Sub::Exporter -setup => { exports => [qw( format_editnote format_setlist format_wikitext )] }; sub release_date { my $date = shift; my ($y, $m, $d) = split /-/, $date; my $str = ""; $str .= $y if ($y && 0 + $y); $str .= "-".$m if ($m && 0 + $m); $str .= "-".$d if ($d && 0 + $d); return $str; } sub format_length { my $ms = shift; return MusicBrainz::Server::Track::FormatTrackLength($ms); } sub format_distance { my $ms = shift; return "0 s" if (!$ms); return MusicBrainz::Server::Track::FormatTrackLength($ms); } sub format_setlist { my ($text) = @_; # Encode < and > $text =~ s//>/g; # Lines starting with @ are artists $text =~ s/^@ ([^\r\n]*)/format_setlist_artist($1)/meg; # Lines starting with * are works $text =~ s/^\* ([^\r\n]*)/format_setlist_work($1)/meg; # Lines starting with # are comments $text =~ s/^# ([^\r\n]*)/$1<\/span>/mg; # Fix newlines $text =~ s/(\015\012|\012\015|\012|\015)//g; return $text; } sub format_setlist_artist { my ($line) = @_; $line =~ s/ \[ ([0-9a-f]{8} - [0-9a-f]{4} - [0-9a-f]{4} - [0-9a-f]{4} - [0-9a-f]{12})(?:\|([^\]]+))?\] /_make_link("artist",$1,$2)/eixg; $line = "Artist: $line"; return $line; } sub format_setlist_work { my ($line) = @_; $line =~ s/ \[ ([0-9a-f]{8} - [0-9a-f]{4} - [0-9a-f]{4} - [0-9a-f]{4} - [0-9a-f]{12})(?:\|([^\]]+))?\] /_make_link("work",$1,$2)/eixg; return $line; } sub format_wikitext { my ($text) = @_; return '' unless $text; $text =~ s//>/g; my $entity_names = join('|', entities_with('mbid', take => sub { my $type = shift; return shift->{url} // $type } )); # MBS-2437: Expand MBID entity links $text =~ s/ \[ ($entity_names): ([0-9a-f]{8} - [0-9a-f]{4} - [0-9a-f]{4} - [0-9a-f]{4} - [0-9a-f]{12})(?:\|([^\]]+))?\] /_make_link($1,$2,$3)/eixg; return decode( 'utf-8', Text::WikiFormat::format( encode('utf-8' => $text), {}, { prefix => "//wiki.musicbrainz.org/", extended => 1, nofollow_extended => 1, absolute_links => 1, implicit_links => 0 }) ); } sub _make_link { my ($type, $mbid, $content) = @_; $content //= "$type:$mbid"; return "$content" } sub encode_square_brackets { my $t = $_[0]; my %ent = ( '[' => '[', ']' => ']' ); $t =~ s/([\[\]])/$ent{$1}/g; $t; } sub _display_trimmed { my $url = shift; my $encoded_url = encode_square_brackets(encode_entities($url)); # shorten url's that are longer 50 characters my $display_url = length($url) > 50 ? encode_square_brackets(encode_entities(substr($url, 0, 48))) . "…" : $encoded_url; $encoded_url = "http://$encoded_url" unless $encoded_url =~ m{^(?:https?:)?//}; return qq{$display_url}; } sub normalise_url { my $url = shift; # The regular expression in format_editnote is not clever enough to handle # percent encoded parenthesis. $url =~ s/%28/$/g; $url =~ s/%29/$/g; return $url; } sub format_editnote { my ($html) = @_; my $is_url = 1; my $server = DBDefs->WEB_SERVER; # Pre-pass the edit note to attempt to normalise any URLs $html =~ s{(https?://[^\s]+)}{normalise_url($1)}eg; # Encode < and > $html =~ s//>/g; # The following taken from http://daringfireball.net/2010/07/improved_regex_for_matching_urls $html =~ s{ # Match the start of the edit note entirely, or ensure that the proceeding # character is not a : (as we don't want to match foo://bar.com as # foo:). (?:^|(?]+ # Run of non-space, non-()<> | # or $([^\s()<>]+|(\([^\s()<>]+$))*\) # balanced parens, up to 2 levels )+ (?: # End with: $([^\s()<>]+|(\([^\s()<>]+$))*\) # balanced parens, up to 2 levels | # or [^\s`!()\[\]{};:'".,<>?«»“”‘’] # not a space or one of these punct chars ) ) }{_display_trimmed($1, $2, $3, $4)}egsxi; $html =~ s[\b(?:mod(?:eration)? #?|edit[#:\h]+|edit id[#:\h]+|change[#:\h]+)(\d+)\b] [edit #$1]gi; # links to wikidocs $html =~ s/doc:(\w[\/\w]*)(``)*/$1<\/a>/gi; $html =~ s/\[(\p{IsUpper}[\/\w]*)\]/$1<\/a>/g; $html =~ s/<\/?p[^>]*>//g; $html =~ s/]*\/?>//g; $html =~ s/'''([^']+.*?)'''/$1<\/strong>/g; $html =~ s/''(.*?)''/$1<\/em>/g; $html =~ s/(\015\012|\012\015|\012|\015)//g; return $html; } =func uri_decode Attempt to decode a URL and unescape characters, assuming it's in UTF-8 encoding. If this is not the case, the function behaves as the identity function. =cut sub uri_decode { my $uri = shift; try { decode('utf-8', uri_unescape($uri), Encode::FB_CROAK); } catch { $uri; } } sub language { return code2language(shift); } sub locale { my $locale_name = shift or return ''; try { my $locale = DateTime::Locale->load($locale_name); return $locale->name } catch { return; } } sub gravatar { my $email = shift; return sprintf '//gravatar.com/avatar/%s?d=mm', md5_hex(lc(trim($email))); } sub _amazon_https { my $url = shift; $url =~ s,http://ec[x4]\.images-amazon\.com/,https://images-na.ssl-images-amazon.com/,; return $url; } sub _generic_https { my $url = shift; # list only those sites that support https $url =~ s,http://(www\.ozon\.ru|(?:[^.\/]+\.)?archive\.org)/,https://$1/,; return $url; } sub coverart_https { my $url = shift; $url = _amazon_https($url); $url = _generic_https($url); return $url; } 1;