123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278 |
- #! /usr/bin/perl
- use autodie;
- use strict;
- use utf8;
- use warnings qw(all);
- use feature 'say';
- use Getopt::Long;
- use Pod::Usage;
- use FileHandle;
- use Regexp::Assemble;
- use Image::ExifTool;
- use Font::TTF::Font;
- use Font::TTF::Ttc;
- =head1 NAME
- license-miner - extract copyright/licensing info from complex files
- =head1 SYNOPSIS
- license-miner [B<options>] [F<path>|inspector:F<path>...]
- =head1 OPTIONS
- =over 12
- =item B<--help>
- Print a brief help message and exits.
- =item B<--man>
- Prints the manual page and exits.
- =item B<--verbose>
- Prints names of paths and the inspector used.
- =item B<--debug>
- Prints extracted info.
- =item B<--suffix>
- Suffix appended to each input filename to form output filename.
- Default value: F<.metadata>
- =back
- =head1 DESCRIPTION
- B<This program> will inspect files,
- extract their copyright and licensing info,
- and save the result next to the files.
- File paths are provided either as arguments
- or (if no arguments provided) from STDIN.
- Each path may optionally be prefixed with an inspector to use.
- Default is to pick inspector based on file suffix.
- =head1 INSPECTORS
- Available inspectors are B<ttf> and B<exif>.
- =over 12
- =item B<ttf>
- TrueType fonts (including Truetype-flavored OpenType and WOFF).
- Used by default for extensions F<.ttf>, F<.otf>, F<woff>, F<woff2>.
- Beware that some OpenType fonts are not TrueType but Type1,
- which may fail to parse correctly based on suffix detection.
- If that happens, try force using the exif inspector
- by prefixing the path with "exif:".
- =item B<ttc>
- TrueType collections (including Truetype-flavored OpenType).
- Used by default for extension F<.ttc>.
- If parsing fails, try force using the exif inspector
- by prefixing the path with "exif:".
- =item B<exif>
- Misc. images, fonts, and other data files.
- Used by default for extensions
- F<.afm>, F<aif>, F<aifc>, F<aiff>, F<avi>, F<dcm>, F<dfont>, F<doc>,
- F<docx>, F<eps>, F<epub>, F<exe>, F<flac>, F<flv>, F<gif>, F<icc>, F<icm>,
- F<jpeg>, F<jpg>, F<mov>, F<mp3>, F<mp4>, F<mpeg>, F<mpg>, F<odp>,
- F<ods>, F<odt>, F<oga>, F<ogg>, F<ogv>, F<pdf>, F<pfa>, F<pfb>, F<pfm>,
- F<png>, F<ppt>, F<pptx>, F<ps>, F<psd>, F<ra>, F<svg>, F<swf>, F<tif>,
- F<tiff>, F<wav>, F<webm>, F<webp>, F<xcf>, F<xls>, F<xlsx>.
- Beware that some OpenType fonts are not TrueType but Type1,
- which may fail to parse correctly based on suffix detection.
- If that happens, try force using the exif inspector
- by prefixing the path with "exif:".
- =back
- =head1 ENVIRONMENT VARIABLES
- =over 12
- =item B<LICENSE_MINER_SUFFIX>
- Sets the option B<suffix>, if not provided as commandline argument.
- =back
- =cut
- # avoid custom configuration of ExifTool
- BEGIN { $Image::ExifTool::configFile = '' }
- my $suffix = $ENV{'LICENSE_MINER_SUFFIX'} || ".metadata";
- GetOptions( help => \my $help,
- man => \my $man,
- verbose => \my $verbose,
- debug => \my $debug,
- suffix => \$suffix,
- ) or pod2usage(2);
- pod2usage( -verbose => 1 ) if $help;
- pod2usage( -verbose => 2, -exitstatus => 0 ) if $man;
- # Fail if no paths provided as arguments and STDIN is interactive
- pod2usage("$0: No paths provided.") if ((@ARGV == 0) && (-t STDIN));
- my $dispatch = {
- # TrueType (including Truetype-flavored OpenType and WOFF) fonts
- '^(ttf:.*|.*\.(?:ttf|otf|woff2?))$' => sub {
- my $file = shift;
- $file =~ s/^ttf://;
- $file = check_infile($file);
- say "ttf: $file" if ($verbose);
- my $handle = ($debug)
- ? *STDOUT{IO}
- : FileHandle->new( check_outfile($file), '> :encoding(UTF-8)' );
- # source: http://scripts.sil.org/IWS-Chapter08#3054f18b
- my %table = (
- Copyright => 0,
- Trademark => 7,
- License => 13,
- 'License URL' => 14,
- );
- my $font = Font::TTF::Font->open($file) or do {
- say STDERR "ERROR: Failed to parse file as TrueType font: $_";
- exit 1;
- };
- my $fn = $font->{'name'}->read;
- foreach (sort keys %table) {
- my $value = $fn->find_name($table{$_});
- print $handle $_ . ": " . $value . "\n"
- if ($value);
- }
- },
- # TrueType (including Truetype-flavored OpenType) collections
- '^(ttc:.*|.*\.(?:ttc))$' => sub {
- my $file = shift;
- $file =~ s/^ttf://;
- $file = check_infile($file);
- say "ttf: $file" if ($verbose);
- my $handle = ($debug)
- ? *STDOUT{IO}
- : FileHandle->new( check_outfile($file), '> :encoding(UTF-8)' );
- # source: http://scripts.sil.org/IWS-Chapter08#3054f18b
- my %table = (
- Copyright => 0,
- Trademark => 7,
- License => 13,
- 'License URL' => 14,
- );
- my $collection = Font::TTF::Ttc->open($file) or do {
- say STDERR "ERROR: Failed to parse file as TrueType collection: $_";
- exit 1;
- };
- foreach ( @{$collection->{'directs'}} ) {
- my $fn = $_->{'name'}->read;
- foreach (sort keys %table) {
- my $value = $fn->find_name($table{$_});
- print $handle $_ . ": " . $value . "\n"
- if ($value);
- }
- }
- },
- # exif: misc. images and fonts
- '^(exif:.*|.*\.(?:afm|aif|aifc|aiff|avi|dcm|dfont|doc|docx|eps|epub|exe|flac|flv|gif|icc|icm|jpeg|jpg|mov|mp3|mp4|mpeg|mpg|odp|ods|odt|oga|ogg|ogv|pdf|pfa|pfb|pfm|png|ppt|pptx|ps|psd|ra|svg|swf|tif|tiff|wav|webm|webp|xcf|xls|xlsx))$' => sub {
- my $file = shift;
- $file =~ s/^exif://;
- $file = check_infile($file);
- say "exif: $file" if ($verbose);
- my $exifTool = new Image::ExifTool;
- my $handle = ($debug)
- ? *STDOUT{IO}
- : FileHandle->new( check_outfile($file), '> :encoding(UTF-8)' );
- my $info = $exifTool->ImageInfo($file, qw[
- *Agreement* *Artist* *Author* *Certificate*
- *Comment* *Company* *Contact* *Copyright*
- *Creator* *Credit* *Disclaimer* *Institution*
- *IPTCDigest* *Legal* *Licens* *Modified*
- *Notice* *Organization* *Permissions* *Perms*
- *Restrictions* *Rights* *Statement* *Terms*
- *Trademark*
- ]);
- my $seen;
- print $handle "File: $file\n";
- foreach (sort keys %$info) {
- my $tagdesc = $exifTool->GetDescription($_);
- print $handle "$tagdesc: $$info{$_}\n";
- }
- }
- };
- my $re = Regexp::Assemble->new( track => 1 )->add( keys %$dispatch );
- while( <> ) {
- chomp;
- if( $re->match($_) ) {
- $dispatch->{ $re->matched }( $re->mvar(1) );
- }
- else {
- say STDERR "ERROR: Unsupported or unparseable string: $_";
- say STDERR " maybe you need a prefix (e.g. \"exif:fonts/SomeType1Font\"";
- exit 1;
- }
- }
- sub check_infile {
- my $infile = shift;
- unless ( -e $infile ) {
- say STDERR "ERROR: file does not exist: $infile";
- exit 1;
- }
- return $infile;
- }
- sub check_outfile {
- my $infile = shift;
- my $outfile = $infile . $suffix;
- if ( -e $outfile ) {
- say STDERR "ERROR: dumpfile exist: $outfile";
- say STDERR " remove or put aside and try again";
- exit 1;
- }
- return $outfile;
- }
- =head1 AUTHOR
- Jonas Smedegaard, C<< <dr@jones.dk> >>
- =head1 LICENSE AND COPYRIGHT
- Copyright 2014-2017 Jonas Smedegaard
- This program is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the
- Free Software Foundation; either version 3, or (at your option) any
- later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
- You should have received a copy of the GNU General Public License along
- with this program. If not, see <http://www.gnu.org/licenses/>.
- =cut
|