license-miner 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. #! /usr/bin/perl
  2. use autodie;
  3. use strict;
  4. use utf8;
  5. use warnings qw(all);
  6. use feature 'say';
  7. use Getopt::Long;
  8. use Pod::Usage;
  9. use FileHandle;
  10. use Regexp::Assemble;
  11. use Image::ExifTool;
  12. use Font::TTF::Font;
  13. use Font::TTF::Ttc;
  14. =head1 NAME
  15. license-miner - extract copyright/licensing info from complex files
  16. =head1 SYNOPSIS
  17. license-miner [B<options>] [F<path>|inspector:F<path>...]
  18. =head1 OPTIONS
  19. =over 12
  20. =item B<--help>
  21. Print a brief help message and exits.
  22. =item B<--man>
  23. Prints the manual page and exits.
  24. =item B<--verbose>
  25. Prints names of paths and the inspector used.
  26. =item B<--debug>
  27. Prints extracted info.
  28. =item B<--suffix>
  29. Suffix appended to each input filename to form output filename.
  30. Default value: F<.metadata>
  31. =back
  32. =head1 DESCRIPTION
  33. B<This program> will inspect files,
  34. extract their copyright and licensing info,
  35. and save the result next to the files.
  36. File paths are provided either as arguments
  37. or (if no arguments provided) from STDIN.
  38. Each path may optionally be prefixed with an inspector to use.
  39. Default is to pick inspector based on file suffix.
  40. =head1 INSPECTORS
  41. Available inspectors are B<ttf> and B<exif>.
  42. =over 12
  43. =item B<ttf>
  44. TrueType fonts (including Truetype-flavored OpenType and WOFF).
  45. Used by default for extensions F<.ttf>, F<.otf>, F<woff>, F<woff2>.
  46. Beware that some OpenType fonts are not TrueType but Type1,
  47. which may fail to parse correctly based on suffix detection.
  48. If that happens, try force using the exif inspector
  49. by prefixing the path with "exif:".
  50. =item B<ttc>
  51. TrueType collections (including Truetype-flavored OpenType).
  52. Used by default for extension F<.ttc>.
  53. If parsing fails, try force using the exif inspector
  54. by prefixing the path with "exif:".
  55. =item B<exif>
  56. Misc. images, fonts, and other data files.
  57. Used by default for extensions
  58. F<.afm>, F<aif>, F<aifc>, F<aiff>, F<avi>, F<dcm>, F<dfont>, F<doc>,
  59. F<docx>, F<eps>, F<epub>, F<exe>, F<flac>, F<flv>, F<gif>, F<icc>, F<icm>,
  60. F<jpeg>, F<jpg>, F<mov>, F<mp3>, F<mp4>, F<mpeg>, F<mpg>, F<odp>,
  61. F<ods>, F<odt>, F<oga>, F<ogg>, F<ogv>, F<pdf>, F<pfa>, F<pfb>, F<pfm>,
  62. F<png>, F<ppt>, F<pptx>, F<ps>, F<psd>, F<ra>, F<svg>, F<swf>, F<tif>,
  63. F<tiff>, F<wav>, F<webm>, F<webp>, F<xcf>, F<xls>, F<xlsx>.
  64. Beware that some OpenType fonts are not TrueType but Type1,
  65. which may fail to parse correctly based on suffix detection.
  66. If that happens, try force using the exif inspector
  67. by prefixing the path with "exif:".
  68. =back
  69. =head1 ENVIRONMENT VARIABLES
  70. =over 12
  71. =item B<LICENSE_MINER_SUFFIX>
  72. Sets the option B<suffix>, if not provided as commandline argument.
  73. =back
  74. =cut
  75. # avoid custom configuration of ExifTool
  76. BEGIN { $Image::ExifTool::configFile = '' }
  77. my $suffix = $ENV{'LICENSE_MINER_SUFFIX'} || ".metadata";
  78. GetOptions( help => \my $help,
  79. man => \my $man,
  80. verbose => \my $verbose,
  81. debug => \my $debug,
  82. suffix => \$suffix,
  83. ) or pod2usage(2);
  84. pod2usage( -verbose => 1 ) if $help;
  85. pod2usage( -verbose => 2, -exitstatus => 0 ) if $man;
  86. # Fail if no paths provided as arguments and STDIN is interactive
  87. pod2usage("$0: No paths provided.") if ((@ARGV == 0) && (-t STDIN));
  88. my $dispatch = {
  89. # TrueType (including Truetype-flavored OpenType and WOFF) fonts
  90. '^(ttf:.*|.*\.(?:ttf|otf|woff2?))$' => sub {
  91. my $file = shift;
  92. $file =~ s/^ttf://;
  93. $file = check_infile($file);
  94. say "ttf: $file" if ($verbose);
  95. my $handle = ($debug)
  96. ? *STDOUT{IO}
  97. : FileHandle->new( check_outfile($file), '> :encoding(UTF-8)' );
  98. # source: http://scripts.sil.org/IWS-Chapter08#3054f18b
  99. my %table = (
  100. Copyright => 0,
  101. Trademark => 7,
  102. License => 13,
  103. 'License URL' => 14,
  104. );
  105. my $font = Font::TTF::Font->open($file) or do {
  106. say STDERR "ERROR: Failed to parse file as TrueType font: $_";
  107. exit 1;
  108. };
  109. my $fn = $font->{'name'}->read;
  110. foreach (sort keys %table) {
  111. my $value = $fn->find_name($table{$_});
  112. print $handle $_ . ": " . $value . "\n"
  113. if ($value);
  114. }
  115. },
  116. # TrueType (including Truetype-flavored OpenType) collections
  117. '^(ttc:.*|.*\.(?:ttc))$' => sub {
  118. my $file = shift;
  119. $file =~ s/^ttf://;
  120. $file = check_infile($file);
  121. say "ttf: $file" if ($verbose);
  122. my $handle = ($debug)
  123. ? *STDOUT{IO}
  124. : FileHandle->new( check_outfile($file), '> :encoding(UTF-8)' );
  125. # source: http://scripts.sil.org/IWS-Chapter08#3054f18b
  126. my %table = (
  127. Copyright => 0,
  128. Trademark => 7,
  129. License => 13,
  130. 'License URL' => 14,
  131. );
  132. my $collection = Font::TTF::Ttc->open($file) or do {
  133. say STDERR "ERROR: Failed to parse file as TrueType collection: $_";
  134. exit 1;
  135. };
  136. foreach ( @{$collection->{'directs'}} ) {
  137. my $fn = $_->{'name'}->read;
  138. foreach (sort keys %table) {
  139. my $value = $fn->find_name($table{$_});
  140. print $handle $_ . ": " . $value . "\n"
  141. if ($value);
  142. }
  143. }
  144. },
  145. # exif: misc. images and fonts
  146. '^(exif:.*|.*\.(?:afm|aif|aifc|aiff|avi|dcm|dfont|doc|docx|eps|epub|exe|flac|flv|gif|icc|icm|jpeg|jpg|mov|mp3|mp4|mpeg|mpg|odp|ods|odt|oga|ogg|ogv|pdf|pfa|pfb|pfm|png|ppt|pptx|ps|psd|ra|svg|swf|tif|tiff|wav|webm|webp|xcf|xls|xlsx))$' => sub {
  147. my $file = shift;
  148. $file =~ s/^exif://;
  149. $file = check_infile($file);
  150. say "exif: $file" if ($verbose);
  151. my $exifTool = new Image::ExifTool;
  152. my $handle = ($debug)
  153. ? *STDOUT{IO}
  154. : FileHandle->new( check_outfile($file), '> :encoding(UTF-8)' );
  155. my $info = $exifTool->ImageInfo($file, qw[
  156. *Agreement* *Artist* *Author* *Certificate*
  157. *Comment* *Company* *Contact* *Copyright*
  158. *Creator* *Credit* *Disclaimer* *Institution*
  159. *IPTCDigest* *Legal* *Licens* *Modified*
  160. *Notice* *Organization* *Permissions* *Perms*
  161. *Restrictions* *Rights* *Statement* *Terms*
  162. *Trademark*
  163. ]);
  164. my $seen;
  165. print $handle "File: $file\n";
  166. foreach (sort keys %$info) {
  167. my $tagdesc = $exifTool->GetDescription($_);
  168. print $handle "$tagdesc: $$info{$_}\n";
  169. }
  170. }
  171. };
  172. my $re = Regexp::Assemble->new( track => 1 )->add( keys %$dispatch );
  173. while( <> ) {
  174. chomp;
  175. if( $re->match($_) ) {
  176. $dispatch->{ $re->matched }( $re->mvar(1) );
  177. }
  178. else {
  179. say STDERR "ERROR: Unsupported or unparseable string: $_";
  180. say STDERR " maybe you need a prefix (e.g. \"exif:fonts/SomeType1Font\"";
  181. exit 1;
  182. }
  183. }
  184. sub check_infile {
  185. my $infile = shift;
  186. unless ( -e $infile ) {
  187. say STDERR "ERROR: file does not exist: $infile";
  188. exit 1;
  189. }
  190. return $infile;
  191. }
  192. sub check_outfile {
  193. my $infile = shift;
  194. my $outfile = $infile . $suffix;
  195. if ( -e $outfile ) {
  196. say STDERR "ERROR: dumpfile exist: $outfile";
  197. say STDERR " remove or put aside and try again";
  198. exit 1;
  199. }
  200. return $outfile;
  201. }
  202. =head1 AUTHOR
  203. Jonas Smedegaard, C<< <dr@jones.dk> >>
  204. =head1 LICENSE AND COPYRIGHT
  205. Copyright 2014-2017 Jonas Smedegaard
  206. This program is free software; you can redistribute it and/or modify it
  207. under the terms of the GNU General Public License as published by the
  208. Free Software Foundation; either version 3, or (at your option) any
  209. later version.
  210. This program is distributed in the hope that it will be useful, but
  211. WITHOUT ANY WARRANTY; without even the implied warranty of
  212. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  213. General Public License for more details.
  214. You should have received a copy of the GNU General Public License along
  215. with this program. If not, see <http://www.gnu.org/licenses/>.
  216. =cut