Skip to content
This repository was archived by the owner on Sep 20, 2021. It is now read-only.

Commit 2ee0c67

Browse files
GiorgosPaGeorgios Papoutsakis
authored and
Georgios Papoutsakis
committed
BibCheck: adds a new plugin and unit tests
* Adds journal_names plugin. * Adds unit tests for the new plugin and atlas_authors plugin. * Fixes pep8 issues on atlas_authors. Signed-off by: Georgios Papoutsakis <[email protected]>
1 parent 917ae0c commit 2ee0c67

File tree

4 files changed

+193
-2
lines changed

4 files changed

+193
-2
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# -*- coding: utf-8 -*-
2+
##
3+
## This file is part of INSPIRE.
4+
## Copyright (C) 2014 CERN.
5+
##
6+
## Invenio is free software; you can redistribute it and/or
7+
## modify it under the terms of the GNU General Public License as
8+
## published by the Free Software Foundation; either version 2 of the
9+
## License, or (at your option) any later version.
10+
##
11+
## Invenio is distributed in the hope that it will be useful, but
12+
## WITHOUT ANY WARRANTY; without even the implied warranty of
13+
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
## General Public License for more details.
15+
##
16+
## You should have received a copy of the GNU General Public License
17+
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
18+
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
19+
20+
21+
"""BibTask Inspires plugins Test Suite."""
22+
23+
__revision__ = "$Id$"
24+
25+
from invenio.testutils import make_test_suite, run_test_suite, InvenioTestCase
26+
from invenio.bibcheck_plugins import atlas_authors, \
27+
journal_names
28+
from invenio.bibcheck_task import AmendableRecord
29+
from invenio.bibrecord import record_add_field
30+
31+
MOCK_RECORD = {
32+
'001': [([], ' ', ' ', '1', 7)],
33+
'005': [([], ' ', ' ', '20130621172205.0', 7)],
34+
'100': [([('a', 'Photolab '),
35+
('c', '')], ' ', ' ', '', 7)], # Trailing spaces
36+
'245': [([('a', ''), ('b', '')], ' ', ' ', '', 7)], # remove-empty-fields
37+
'260': [([('c', '2000-06-14')], ' ', ' ', '', 7)],
38+
'261': [([('c', '14 Jun 2000')], ' ', ' ', '', 7)],
39+
'262': [([('c', '14 06 00')], ' ', ' ', '', 7)],
40+
'263': [([('c', '2000 06 14')], ' ', ' ', '', 7)],
41+
'264': [([('c', '1750 06 14')], ' ', ' ', '', 7)],
42+
'265': [([('c', '2100 06 14')], ' ', ' ', '', 7)],
43+
'340': [([('a', 'FI\xc3\x28LM')], ' ', ' ', '', 7)], # Invalid utf-8
44+
'595': [([('a', ' Press')], ' ', ' ', '', 7)], # Leading spaces
45+
'653': [([('a', 'LEP')], '1', ' ', '', 7)],
46+
'700': [([('a', 'Bella, Ludovica Aperio'),
47+
('c', '')], ' ', ' ', '', 7)], # remove-empty-fields
48+
'856': [([('f', '[email protected]')], '0', ' ', '', 7)],
49+
'994': [([('u', 'http://httpstat.us/200')], '4', ' ', '', 7)], # Url that works
50+
'995': [([('u', 'www.google.com/favicon.ico')], '4', ' ', '', 7)], # url without protocol
51+
'996': [([('u', 'httpstat.us/301')], '4', ' ', '', 7)], # redirection without protocol
52+
'997': [([('u', 'http://httpstat.us/404')], '4', ' ', '', 7)], # Error 404
53+
'998': [([('u', 'http://httpstat.us/500')], '4', ' ', '', 7)], # Error 500
54+
'999': [([('u', 'http://httpstat.us/301')], '4', ' ', '', 7)], # Permanent redirect
55+
'999': [([('a', '5345435'),
56+
('i', '52345235'),
57+
('r', '4243424'),
58+
('s', 'fsdf.gfdfgsdfg.'),
59+
('0', '2')], 'C', '5', '', 7),
60+
([('a', 'mplampla')], 'C', '5', '', 8)]
61+
}
62+
63+
RULE_MOCK = {
64+
"name": "test_rule",
65+
"holdingpen": True
66+
}
67+
68+
69+
class BibCheckInspirePluginsTest(InvenioTestCase):
70+
""" Bibcheck default plugins test """
71+
72+
def assertAmends(self, test, changes, **kwargs):
73+
"""
74+
Assert that the plugin "test" amends the mock record when called with
75+
params kwargs.
76+
"""
77+
record = AmendableRecord(MOCK_RECORD)
78+
record.set_rule(RULE_MOCK)
79+
test.check_record(record, **kwargs)
80+
self.assertTrue(record.amended)
81+
self.assertEqual(len(record.amendments), len(changes))
82+
for field, val in changes.iteritems():
83+
if val is not None:
84+
self.assertEqual(
85+
((field, 0, 0), val),
86+
list(record.iterfield(field))[0]
87+
)
88+
else:
89+
self.assertEqual(len(list(record.iterfield(field))), 1)
90+
91+
def test_atlas_authors(self):
92+
""" atlas_authors plugin test """
93+
self.assertAmends(atlas_authors, {'700__a': 'Aperio Bella, Ludovica'})
94+
95+
def test_journal_names(self):
96+
""" journal_names plugin test """
97+
rec = {}
98+
record_add_field(rec, '773', subfields=[('p', 'JHEP')])
99+
record_add_field(rec, '001', controlfield_value='111')
100+
record_add_field(rec, '999', ind1='C', ind2='5', subfields=[('s', 'JHEP,a,b')])
101+
rec = AmendableRecord(rec)
102+
rec.set_rule(RULE_MOCK)
103+
journal_names.check_records([rec])
104+
self.assertEqual(rec.valid, True)
105+
106+
#773__p JHEP2 journal does not exist
107+
rec = {}
108+
record_add_field(rec, '773', subfields=[('p', 'JHEP2')])
109+
record_add_field(rec, '001', controlfield_value='111')
110+
record_add_field(rec, '999', ind1='C', ind2='5', subfields=[('s', 'JHEP,a,b')])
111+
rec = AmendableRecord(rec)
112+
rec.set_rule(RULE_MOCK)
113+
journal_names.check_records([rec])
114+
self.assertEqual(rec.valid, False)
115+
116+
#999C5s has 3 commas
117+
rec = {}
118+
record_add_field(rec, '773', subfields=[('p', 'JHEP')])
119+
record_add_field(rec, '001', controlfield_value='111')
120+
record_add_field(rec, '999', ind1='C', ind2='5', subfields=[('s', 'JHEP,a,b,c')])
121+
rec = AmendableRecord(rec)
122+
rec.set_rule(RULE_MOCK)
123+
journal_names.check_records([rec])
124+
self.assertEqual(rec.valid, False)
125+
126+
#999C5s JHEP2 journal does not exist
127+
rec = {}
128+
record_add_field(rec, '773', subfields=[('p', 'JHEP')])
129+
record_add_field(rec, '001', controlfield_value='111')
130+
record_add_field(rec, '999', ind1='C', ind2='5', subfields=[('s', 'JHEP2,a,b')])
131+
rec = AmendableRecord(rec)
132+
rec.set_rule(RULE_MOCK)
133+
journal_names.check_records([rec])
134+
self.assertEqual(rec.valid, False)
135+
136+
#773__p only the journal name must be in this field
137+
rec = {}
138+
record_add_field(rec, '773', subfields=[('p', 'JHEP,a,b')])
139+
record_add_field(rec, '001', controlfield_value='111')
140+
record_add_field(rec, '999', ind1='C', ind2='5', subfields=[('s', 'JHEP,a,b')])
141+
rec = AmendableRecord(rec)
142+
rec.set_rule(RULE_MOCK)
143+
journal_names.check_records([rec])
144+
self.assertEqual(rec.valid, False)
145+
146+
TEST_SUITE = make_test_suite(BibCheckInspirePluginsTest)
147+
148+
if __name__ == "__main__":
149+
run_test_suite(TEST_SUITE)

bibcheck/plugins/atlas_authors.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/usr/bin/env python
2-
32
##
4-
## This file is part of Invenio.
3+
## This file is part of INSPIRE.
54
## Copyright (C) 2014 CERN.
65
##
76
## Invenio is free software; you can redistribute it and/or

bibcheck/plugins/journal_names.py

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
## This file is part of INSPIRE.
4+
## Copyright (C) 2014 CERN.
5+
##
6+
## Invenio is free software; you can redistribute it and/or
7+
## modify it under the terms of the GNU General Public License as
8+
## published by the Free Software Foundation; either version 2 of the
9+
## License, or (at your option) any later version.
10+
##
11+
## Invenio is distributed in the hope that it will be useful, but
12+
## WITHOUT ANY WARRANTY; without even the implied warranty of
13+
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
## General Public License for more details.
15+
##
16+
## You should have received a copy of the GNU General Public License
17+
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
18+
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
19+
20+
from invenio.bibcheck_task import AmendableRecord
21+
from invenio.refextract_kbs import get_kbs
22+
23+
24+
def check_records(records):
25+
""" Bibcheck plugin to check if the journal names are in
26+
Inspires short form and if the field 999C5s is in
27+
the form jnl,vol,page """
28+
journals = get_kbs()['journals'][1].values()
29+
for record in records:
30+
for position, value in record.iterfield('999C5s'):
31+
values = value.split(',')
32+
name = values[0]
33+
if len(values) != 3:
34+
record.set_invalid('value in field 999C5s is not in the form jnl,vol,page')
35+
if name not in journals:
36+
record.set_invalid('value in field 999C5s: %s not a valid journal short name' % name)
37+
for position, value in record.iterfield('773__p'):
38+
if value not in journals:
39+
record.set_invalid('value in field 773__p: %s not a valid journal short name' % value)

bibcheck/rules.cfg

+4
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,7 @@ check = earliest_date
2929
#check = atlas_authors
3030
#check.kb_file = path_to_file
3131
#filter_collection = HEP
32+
33+
#[journal_names]
34+
#check = journal_names
35+
#filter_collection = HEP

0 commit comments

Comments
 (0)