#!/usr/bin/perl # # A consistency checker for BibTeX files. # Copyright (c) 2005, Hiroyuki Ohsaki. # All rights reserved. # # $Id: bibcheck,v 1.10 2010/03/29 11:14:31 oosaki Exp $ # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. no diagnostics; no warnings; require 'jcode.pl'; use English; use File::Basename; use Getopt::Std; use Text::BibTeX::Bib; use Text::BibTeX; use strict; # redefine Text::BibTeX::Entry::warn with supressing warnings { no warnings; eval <<'EOF'; sub Text::BibTeX::Entry::warn { my ($self, $warning, $field) = @_; my $location = ''; if ($self->{'file'}) { $location = $self->{'file'}{'filename'} . ":"; } my $lines = $self->{'lines'}; my $entry_range = "$lines->{'START'}"; if (defined $field) { $location .= (exists $lines->{$field}) ? "line $lines->{$field}: " : "$entry_range (unknown field \"$field\"): "; } else { $location .= "$entry_range: "; } print "$location**** $warning\n"; } EOF } my %CHECK_TYPE_TBL = ( 'article' => \&check_type_article, 'inproceedings' => \&check_type_inproceedings, 'misc' => \&nop, 'unpublished' => \&nop, 'mastersthesis' => \&check_type_mastersthesis, 'book' => \&check_type_book, ); my %CHECK_FIELD_TBL = ( 'abstract' => \&nop, 'eabstract' => \&nop, 'author' => \&check_field_author, 'booktitle' => \&check_field_booktitle, 'journal' => \&nop, 'month' => \&check_field_month, 'number' => \&nop, 'pages' => \&check_field_pages, 'title' => \&check_field_title, 'volume' => \&nop, 'year' => \&nop, 'note' => \&check_field_note, 'editor' => \&nop, 'publisher' => \&nop, 'school' => \&nop, 'howpublished' => \&check_field_howpublished, ); sub usage { my $prog = basename($0); print <get('journal'); next if ( $journal and $journal =~ /(submitted to|to appear in|ÆÃÊ̸¦µæÊó¹ð|internet draft|request for comments)/i ); } $ent->warn("missing mandatory field `$_'") unless $hashp->{$_}; } } sub check_type_inproceedings { my ( $ent, $hashp ) = @_; for (qw(author title booktitle pages year month)) { $ent->warn("missing mandatory field `$_'") unless $hashp->{$_}; } } sub check_type_mastersthesis { my ( $ent, $hashp ) = @_; for (qw(author title school year month)) { $ent->warn("missing mandatory field `$_'") unless $hashp->{$_}; } } sub check_type_book { my ( $ent, $hashp ) = @_; for (qw(author title publisher year month)) { $ent->warn("missing mandatory field `$_'") unless $hashp->{$_}; } } sub check_field_title { my $ent = shift; my $title = $ent->get('title'); # quote all spaces within brackets (dirty hack) $title =~ s/({.*?)\s+(.*?})/${1}_$2/g; my @list = split ( /\s+/, $title ); # guess if title is spelled in lower case my $lower_words = 0; for (@list) { next unless $_; $lower_words++ if /^[a-z]/; } # if so, assume words beginning with capital letters should be quoted my $is_lower = ( $lower_words > @list / 2 ); for (@list) { next unless $_; $ent->warn("capital word `$_' must be quoted (e.g., `{TCP})'") if ( /^[A-Z][A-Z.-]+$/ or ( $_ ne $list[0] and $is_lower and /^[A-Z]/ ) ); } } sub check_field_author { my $ent = shift; my $author = $ent->get('author'); my $code = jcode::getcode(\$author) || 'ascii'; if ( $code eq 'ascii' ) { my @list = split ( /\s+and\s+/, $author ); $ent->warn("use `and others' instead of `et al.'") if ( $author =~ /et al\./ ); for (@list) { # check misuse of comma $ent->warn( "invalid author name `$_' (e.g., `John Smith and Mike Johns')") if /,/; for my $v ( split ( /\s+/, $_ ) ) { $ent->warn("non-capitalized author name `$_'") unless ( $v ne 'others' and $v =~ /^[A-Z]/ ); } } } elsif ( $code eq 'euc' ) { # author names in Japanese must be fully quoted $ent->warn("author names must be enclosed by brackets") unless ( $author =~ /^{(.*)}$/ ); next unless $1; my @list = split ( /,\s+/, $1 ); for (@list) { # check lacking space or misuse of touten $ent->warn("invalid author name `$_'") if /(,|¡¢)/; } } else { # unreachable } } sub check_field_booktitle { my $ent = shift; my $type = $ent->type; my $title = $ent->get('booktitle'); $ent->warn("missing `Proceedings of' in booktitle") if ( $type eq 'inproceedings' and $title !~ /Proceedings of/ ); } sub check_field_pages { my $ent = shift; my $pages = $ent->get('pages') || ''; $ent->warn("malformatted page number `$pages' (e.g., ``12--18')") unless ( $pages =~ /^[A-Za-z\d.-]+(--[A-Za-z\d.-]+)?$/ ); } sub check_field_month { my $ent = shift; my $month = $ent->get('month') || ''; $ent->warn("use month macros instead of `$month' (e.g., `sep')") unless ( $month =~ /^(January|February|March|April|May|June|July|August|September|October|November|December)$/ ); } sub check_field_note { my $ent = shift; my $note = $ent->get('note'); if ( $note =~ m|(http://[^\s\}]+)| ) { my $url = $1; $ent->warn("missing `Also available as' before `$url'") unless ( $note =~ /Also available as/ ); $ent->warn("use \\url{} macro for including `$url'") unless ( $note =~ /\\url/ ); } } sub check_field_howpublished { my $ent = shift; my $howpublished = $ent->get('howpublished'); if ( $howpublished =~ m|(http://[^\s\}]+)| ) { my $url = $1; $ent->warn("remove `$MATCH' before `$url'") unless ( $howpublished =~ /(Also\s+)?available(\s(as|at))?/ ); $ent->warn("use \\url{} macro for including `$url'") unless ( $howpublished =~ /\\url/ ); } } sub check_key { my $ent = shift; my $key = $ent->key; $ent->warn("malformatted key `$key' (e.g., `Smith05:Anonymous')") unless ( $key =~ /^[0-9A-Za-z_-]+\d\d:[0-9A-Za-z_-]+$/ or $key =~ /^RFC/ ); } sub check_file { my $file = shift; my $bib = new Text::BibTeX::File $file; while ( my $ent = new Text::BibTeX::Entry $bib) { next unless $ent->parse_ok; #$ent->print; my $code = jcode::getcode( \$ent->print_s ) || 'ascii'; $ent->warn("invalid character code `$code' (must be ascii/euc)") unless ( $code eq 'ascii' or $code eq 'euc' ); check_key($ent); my @fields = $ent->fieldlist; my %found; for my $f (@fields) { #printf "\n>> %s = {%s}\n", $f, $ent->get($f); if ( exists $CHECK_FIELD_TBL{$f} ) { &{ $CHECK_FIELD_TBL{$f} } ($ent); $found{$f} = 1; } else { $ent->warn("unknown/unsupported field `$f'"); } } my $type = $ent->type; if ( exists $CHECK_TYPE_TBL{$type} ) { &{ $CHECK_TYPE_TBL{$type} } ( $ent, \%found ); } else { $ent->warn("unknown/unsupported type `$type'"); } } } getopts 'v' or usage; @ARGV or usage; for my $file (@ARGV) { check_file($file); }