
| Current Path : /bin/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : //bin/bibdoiadd |
#!/usr/bin/perl
# Debian required shebang above
# Original shebang below
# #!/usr/bin/env perl
=pod
=head1 NAME
bibdoiadd.pl - add DOI numbers to papers in a given bib file
=head1 SYNOPSIS
bibdoiadd [B<-c> I<config_file>] [B<-C> 1|0] [B<-e> 1|0] [B<-f>] [B<-o> I<output>] I<bib_file>
=head1 OPTIONS
=over 4
=item B<-c> I<config_file>
Configuration file. If this file is absent, some defaults are used.
See below for its format.
=item B<-C> 1|0
Whether to canonize names in the output (1) or not (0). By default, 1.
=item B<-e>
If 1 (default), add empty doi if a doi cannot be found. This prevents
repeated searches for the same entries if you add new entries to the
file. Calling C<-e 0> suppresses this behavior.
=item B<-f>
Force checking doi number even if one is present
=item B<-o> I<output>
Output file. If this option is not used, the name for the
output file is formed by adding C<_doi> to the input file
=back
=head1 DESCRIPTION
The script reads a BibTeX file. It checks whether the entries have
DOIs. If not, it tries to contact http://www.crossref.org to get the
corresponding DOI. The result is a BibTeX file with the fields
C<doi=...> added.
The name of the output file is either set by the B<-o> option or
is derived by adding the suffix C<_doi> to the output file.
There are two options for making queries with Crossref: free account
and paid membership. In the first case you still must register with
Crossref and are limited to a small number of queries, see the
agreement at
C<http://www.crossref.org/01company/free_services_agreement.html>. In
the second case you have a username and password, and can use them for
automatic queries. I am not sure whether the use of this script is
allowed for the free account holders. Anyway if you try to add DOI
to a large number of entries, you should register as a paid member.
=head1 CONFIGURATION FILE
The configuration file is mostly self-explanatory: it has comments
(starting with C<#>) and assginments in the form
$field = value ;
The important parameters are C<$mode> (C<'free'> or C<'paid'>),
C<$email> (for free users) and C<$username> & C<$password> for paid
members.
=head1 EXAMPLES
bibdoiadd -c bibdoiadd.cfg -o - citations.bib > result.bib
bibdoiadd -c bibdoiadd.cfg -o result.bib citations.bib
=head1 AUTHOR
Boris Veytsman
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2014-2017 Boris Veytsman
This is free software. You may redistribute copies of it under the
terms of the GNU General Public License
L<http://www.gnu.org/licenses/gpl.html>. There is NO WARRANTY, to the
extent permitted by law.
=cut
use strict;
BEGIN {
# find files relative to our installed location within TeX Live
chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
if (length($TLMaster)) {
unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
}
}
use IO::File;
use BibTeX::Parser;
use LaTeX::ToUnicode qw (convert);
use Getopt::Std;
use URI::Escape;
use LWP::Simple;
# Sometimes AMS forgets to update certificates
$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME}=0;
my $USAGE="USAGE: $0 [-c config] [-C 1|0] [-e 1|0] [-f] [-o output] file\n";
my $VERSION = <<END;
bibdoiadd v2.2
This is free software. You may redistribute copies of it under the
terms of the GNU General Public License
http://www.gnu.org/licenses/gpl.html. There is NO WARRANTY, to the
extent permitted by law.
$USAGE
END
our %opts;
getopts('fe:c:C:o:hV',\%opts) or die $USAGE;
if ($opts{h} || $opts{V}){
print $VERSION;
exit 0;
}
################################################################
# Defaults and parameters
################################################################
my $inputfile = shift;
my $outputfile = $inputfile;
$outputfile =~ s/\.([^\.]*)$/_doi.$1/;
if (exists $opts{o}) {
$outputfile = $opts{o};
}
my $forceSearch=$opts{f};
my $forceEmpty = 1;
if (exists $opts{e}) {
$forceEmpty = $opts{e};
}
my $canonizeNames = 1;
if (exists $opts{C}) {
$canonizeNames = $opts{C};
}
our $mode='free';
our $email;
our $username;
our $password;
if ($opts{c}) {
if (-r $opts{c}) {
push @INC, ".";
require $opts{c};
} else {
die "Cannot read options $opts{c}. $USAGE";
}
}
# Check the consistency
if ($mode eq 'free' && !length($email)) {
die "Crossref requires a registered e-mail for the free mode queries\n";
}
if ($mode eq 'paid' && (!length($username) || !length($password))) {
die
"Crossref requires a username and password for the paid mode queries\n";
}
my $input= IO::File->new($inputfile) or
die "Cannot find BibTeX file $inputfile\n$USAGE\n";
my $output = IO::File->new("> $outputfile") or
die "Cannot write to $outputfile\n$USAGE\n";
my $parser=new BibTeX::Parser($input);
my $prefix =
"http://www.crossref.org/openurl?redirect=false";
if ($mode eq 'free') {
$prefix .= '&pid='.uri_escape($email);
} else {
$prefix .= '&pid='.uri_escape($username).":".
uri_escape($password);
}
# Processing the input
while (my $entry = $parser->next) {
if (!$entry->parse_ok()) {
print STDERR "Cannot understand entry: ";
$entry->print(*STDERR);
print STDERR "Skipping this entry\n";
next;
}
if (!($entry->type() eq 'ARTICLE') && !($entry->type() eq 'BOOK')
&& !($entry->type() eq 'INCOLLECTION')) {
print $output $entry->raw_bibtex(), "\n\n";
next;
}
if ($entry->has('doi') && !$forceSearch) {
print $output $entry->raw_bibtex(), "\n\n";
next;
}
my $doi = GetDoi($prefix, $entry);
if (length($doi) || $forceEmpty) {
$entry->field('doi',$doi);
}
print $output
$entry->to_string(canonize_names=>$canonizeNames),
"\n\n";
}
$input->close();
$output->close();
exit 0;
###############################################################
# Getting one doi
###############################################################
sub GetDoi {
my ($url,$entry) = @_;
if ($entry->has('issn')) {
$url .= "&issn=".uri_escape_utf8(SanitizeText($entry->field('issn')));
}
if ($entry->has('journal')) {
$url .= "&title=".uri_escape_utf8(SanitizeText($entry->field('journal')));
}
my @names=$entry->author();
if (scalar(@names)) {
my $lastname = SanitizeText($names[0]->last());
$url .= "&aulast=".uri_escape_utf8($lastname);
}
if ($entry->has('volume')) {
$url .= "&volume=".uri_escape_utf8($entry->field('volume'));
}
if ($entry->has('number')) {
$url .= "&issue=".uri_escape_utf8($entry->field('number'));
}
if ($entry->has('pages')) {
my $pages=$entry->field('pages');
$pages =~ s/-.*$//;
$url .= "&spage=".uri_escape_utf8($pages);
}
if ($entry->has('year')) {
$url .= "&date=".uri_escape_utf8($entry->field('year'));
}
my $result=get($url);
if ($result =~ m/<doi [^>]*>(.*)<\/doi>/) {
return $1;
} else {
return "";
}
}
###############################################################
# Sanitization of a text string
###############################################################
sub SanitizeText {
my $string = shift;
$string = convert($string);
$string =~ s/\\newblock//g;
$string =~ s/\\bgroup//g;
$string =~ s/\\egroup//g;
$string =~ s/\\scshape//g;
$string =~ s/\\urlprefix//g;
$string =~ s/\\emph//g;
$string =~ s/\\textbf//g;
$string =~ s/\\enquote//g;
$string =~ s/\\url/URL: /g;
$string =~ s/\\doi/DOI: /g;
$string =~ s/\\\\/ /g;
$string =~ s/\$//g;
$string =~ s/\\checkcomma/,/g;
$string =~ s/~/ /g;
$string =~ s/[\{\}]//g;
return $string;
}