
| Current Path : /bin/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : //bin/bbl2bib |
#!/usr/bin/perl
# Debian required shebang above
# Original shebang below
# #!/usr/bin/env perl
=pod
=head1 NAME
bbl2bib.pl - convert thebibliography environment to a bib file
=head1 SYNOPSIS
bbl2bib.pl [-d] [-u] [B<-o> I<output>] I<file>
=head1 OPTIONS
=over 4
=item [-d]
Send debugging output to stdout
=item B<-o> I<output>
Output file. If this option is not used, the name for the
output file is formed by changing the extension to C<.bib>
=item B<-u>
Do not clean URL fields.
Normally C<bbl2bib> recognizes URL fields of the kind
C<http://dx.doi.org> and their variants and converts them to DOI
fields (see also L<biburl2doi(1)> script). The switch B<-u>
suppresses this cleanup.
=back
=head1 DESCRIPTION
The script tries to reconstruct a C<bib> file from the corresponding
C<thebibliography> environment. One can argue that this operation is
akin to reconstructing a cow from the steak. The way the script does
it is searching for the entry in the MR database, and
creating the corresponding BibTeX fields.
The script reads a TeX or Bbl file and extracts from it the
C<thebibliography> environment. For each bibitem it creates a plain
text bibliography entry, and then tries to match it in
the database.
=head1 INPUT FILE
We assume some structure of the input file:
=over 4
=item 1.
The bibliography is contained between the lines
\begin{thebibliography}...
and
\end{thebibliography}
=item 2.
Each bibliography item starts from the line
\bibitem[...]{....}
=back
=head1 EXAMPLES
bbl2bib -o - file.tex > result.bib
bbl2bib -o result.bib file.bbl
bbl2bib file.tex
=head1 AUTHOR
Boris Veytsman
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2014-2017 Boris Veytsman
This is free software. You may redistribute copies of it under the
terms of the GNU General Public License
L<http://www.gnu.org/licenses/gpl.html>. There is NO WARRANTY, to the
extent permitted by law.
=cut
use strict;
BEGIN {
# find files relative to our installed location within TeX Live
chomp(my $TLMaster = `kpsewhich -var-value=SELFAUTOPARENT`); # TL root
if (length($TLMaster)) {
unshift @INC, "$TLMaster/texmf-dist/scripts/bibtexperllibs";
}
}
use IO::File;
use BibTeX::Parser;
use FileHandle;
use LaTeX::ToUnicode qw (convert);
use Getopt::Std;
use URI::Escape;
use LWP::Simple;
# Sometimes AMS forgets to update certificates
$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME}=0;
my $USAGE="USAGE: $0 [-d] [-u] [-o output] file\n";
my $VERSION = <<END;
bbl2bib v2.3
This is free software. You may redistribute copies of it under the
terms of the GNU General Public License
http://www.gnu.org/licenses/gpl.html. There is NO WARRANTY, to the
extent permitted by law.
$USAGE
END
our %opts;
getopts('do:huV',\%opts) or die $USAGE;
if ($opts{h} || $opts{V}){
print $VERSION;
exit 0;
}
################################################################
# Defaults and parameters
################################################################
my $inputfile = shift;
my $outputfile = $inputfile;
$outputfile =~ s/\.([^\.]*)$/.bib/;
if (exists $opts{o}) {
$outputfile = $opts{o};
}
my $debug=0;
if ($opts{d}) {
$debug=1;
}
my $cleanUrls = 1;
if ($opts{u}) {
$cleanUrls = 0;
}
my $input= IO::File->new($inputfile) or
die "Cannot find Bbl or TeX file $inputfile\n$USAGE\n";
my $output = IO::File->new("> $outputfile") or
die "Cannot write to $outputfile\n$USAGE\n";
my $userAgent = LWP::UserAgent->new;
# Bibitem is a hash with the entries 'key', 'text', 'mr',
# 'zbl'
my $bibitem;
while (<$input>) {
if (!(/\\begin\{thebibliography\}/../\\end\{thebibliography\}/) ||
/\\begin\{thebibliography\}/ || /\\end\{thebibliography\}/) {
next;
}
if (/\\bibitem\s*(\[[^\]]*\])?\{([^\}]*)\}/) {
ProcessBibitem($bibitem);
$bibitem = undef;
$bibitem->{key}=$2;
$bibitem->{text}="";
}
if (!/^\s*$/) {
$bibitem -> {text} .= $_;
}
}
ProcessBibitem($bibitem);
exit 0;
sub ProcessBibitem {
my $bibitem = shift;
my $key = $bibitem->{key};
my $text=$bibitem->{text};
if ($debug) {
print STDOUT "DEBUG: Processing item $key\n";
}
if (!length($text) || $text =~ /^\s+$/s) {
if ($debug) {
print STDOUT "DEBUG: No text found\n";
}
return;
}
my $printtext = $text;
$printtext =~ s/^(.)/% $1/mg;
print $output "$printtext";
$text =~ s/\n/ /mg;
$text =~ s/\\bibitem(\[[^\]]*\])?\{[^\}]*\}//;
# Arxiv entry?
if ($text =~ s/\\arxiv\{([^\}]+)\}\.?//) {
if ($debug) {
print STDOUT "DEBUG: Found arXiv number $1\n";
}
$bibitem->{arxiv}=$1;
}
# Mr number exists?
if ($text =~ s/\\mr\{([^\}]+)\}\.?//) {
if ($debug) {
print STDOUT "DEBUG: Found mr number $1\n";
}
$bibitem->{mr}=$1;
}
# zbl number exists?
if ($text =~ s/\\zbl\{([^\}]+)\}\.?//) {
if ($debug) {
print STDOUT "DEBUG: Found zbl number $1\n";
}
$bibitem->{zbl}=$1;
}
# doi number exists?
if ($text =~ s/\\doi\{([^\}]+)\}\.?//) {
if ($debug) {
print STDOUT "DEBUG: Found doi $1\n";
}
$bibitem->{doi}=$1;
}
$bibitem->{bib} = SearchMref($bibitem);
if ($cleanUrls) {
$bibitem->{bib} = CleanUrl ($bibitem->{bib});
}
PrintBibitem($bibitem);
return;
}
sub SearchMref {
my $bibitem = shift;
my $mirror = "http://www.ams.org/mathscinet-mref";
my $string=uri_escape_utf8($bibitem->{text});
if ($debug) {
print STDOUT "Sending $mirror?ref=$string".'&'."dataType=bibtex\n"
}
my $response = $userAgent->get("$mirror?ref=$string&dataType=bibtex") ->
decoded_content();
if ($debug) {
print STDOUT "DEBUG: Response $response\n";
}
if ($response =~ /<pre>(.*)<\/pre>/s) {
my $bib= $1;
# Looks like Mathscinet sometimes fails to unaccent text.
# For the time being we just delete the offending characters.
# Should probably write LaTeX::FromUnicode instead
$bib =~ s/[^\x00-\x7f]//g;
if ($debug) {
print STDOUT "DEBUG: got $bib\n";
}
my $fh = new FileHandle;
open $fh, "<", \$bib;
my $parser = new BibTeX::Parser($fh);
my $entry = $parser->next;
if (ref($entry) && $entry->parse_ok()) {
$entry->key($bibitem->{key});
return ($entry);
} else {
if ($debug) {
if (!ref($entry)) {
print STDERR "DEBUG: not a reference\n";
} else{
print STDERR "DEBUG: parsing $entry->parse_ok\n";
}
}
}
} else {
if ($debug) {
print STDOUT "DEBUG: did not get an entry\n";
}
}
}
sub CleanUrl {
my $entry = shift;
if (!ref($entry)) {
return $entry;
}
if ($entry->has('doi')) {
return $entry;
}
if (!$entry->has('url')) {
return $entry;
}
if ($entry->field('url') =~ m|^http(?:s)?://(?:dx\.)?doi\.org/(.*)$|) {
$entry->field('doi', $1);
delete $entry->{'url'};
}
return $entry;
}
sub PrintBibitem {
print $output "\n";
my $bibitem = shift;
if (!ref($bibitem->{bib})) {
return;
}
my $entry=$bibitem->{bib};
if ($bibitem->{mr} && ! $entry->field('mrnumber')) {
$entry->field('mrnumber', $bibitem->{mr});
}
if ($entry->field('mrnumber')) {
my $mr=$entry->field('mrnumber');
while (length($mr)<7) {
$mr = "0$mr";
}
$mr=$entry->field('mrnumber', $mr);
}
if ($bibitem->{zbl} && ! $entry->field('zblnumber')) {
$entry->field('zblnumber', $bibitem->{zbl});
}
if ($bibitem->{doi} && ! $entry->field('doi')) {
$entry->field('doi', $bibitem->{doi});
}
if ($bibitem->{arxiv} && ! $entry->field('arxiv')) {
$entry->field('arxiv', $bibitem->{arxiv});
}
print $output $entry->to_string(), "\n\n";
}