Skip to content

Commit d86ff3a

Browse files
authored
Merge pull request #79 from code4lib/non_integer_resumption_token_last_id
non-integer identifiers work in ResumptionToken
2 parents b01c9ec + f66ade5 commit d86ff3a

File tree

5 files changed

+87
-19
lines changed

5 files changed

+87
-19
lines changed

lib/oai/provider/model/activerecord_wrapper.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,13 +159,13 @@ def select_partial(find_scope, token)
159159
# the last 'id' of the previous set is used as the
160160
# filter to the next set.
161161
def token_conditions(token)
162-
last = token.last
162+
last_id = token.last_str
163163
sql = sql_conditions token.to_conditions_hash
164164

165-
return sql if 0 == last
165+
return sql if "0" == last_id
166166
# Now add last id constraint
167167
sql.first << " AND #{identifier_field} > :id"
168-
sql.last[:id] = last
168+
sql.last[:id] = last_id
169169

170170
return sql
171171
end

lib/oai/provider/resumption_token.rb

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,41 @@ module OAI::Provider
88
# The ResumptionToken class forms the basis of paging query results. It
99
# provides several helper methods for dealing with resumption tokens.
1010
#
11+
# OAI-PMH spec does not specify anything about resumptionToken format, they can
12+
# be purely opaque tokens.
13+
#
14+
# Our implementation however encodes everything needed to construct the next page
15+
# inside the resumption token.
16+
#
17+
# == The 'last' component: offset or ID/pk to resume from
18+
#
19+
# The `#last` component is an offset or ID to resume from. In the case of it being
20+
# an ID to resume from, this assumes that ID's are sortable and results are returned
21+
# in ID order, so that the 'last' ID can be used as the place to resume from.
22+
#
23+
# Originally it was assumed that #last was always an integer, but since existing
24+
# implementations (like ActiveRecordWrapper) used it as an ID, and identifiers and
25+
# primary keys are _not_ always integers (can be UUID etc), we have expanded to allow
26+
# any string value.
27+
#
28+
# However, for backwards compatibility #last always returns an integer (sometimes 0 if
29+
# actual last component is not an integer), and #last_str returns the full string version.
30+
# Trying to change #last itself to be string broke a lot of existing code in this gem
31+
# in mysterious ways.
32+
#
33+
# Also beware that in some cases the value 0/"0" seems to be a special value used
34+
# to signify some special case. A lot of "code archeology" going on here after significant
35+
# period of no maintenance to this gem.
1136
class ResumptionToken
12-
attr_reader :prefix, :set, :from, :until, :last, :expiration, :total
37+
attr_reader :prefix, :set, :from, :until, :last, :last_str, :expiration, :total
1338

1439
# parses a token string and returns a ResumptionToken
1540
def self.parse(token_string)
1641
begin
1742
options = {}
18-
matches = /(.+):(\d+)$/.match(token_string)
19-
options[:last] = matches.captures[1].to_i
20-
43+
matches = /(.+):([^ :]+)$/.match(token_string)
44+
options[:last] = matches.captures[1]
45+
2146
parts = matches.captures[0].split('.')
2247
options[:metadata_prefix] = parts.shift
2348
parts.each do |part|
@@ -35,7 +60,7 @@ def self.parse(token_string)
3560
raise OAI::ResumptionTokenException.new
3661
end
3762
end
38-
63+
3964
# extracts the metadata prefix from a token string
4065
def self.extract_format(token_string)
4166
return token_string.split('.')[0]
@@ -44,32 +69,32 @@ def self.extract_format(token_string)
4469
def initialize(options, expiration = nil, total = nil)
4570
@prefix = options[:metadata_prefix]
4671
@set = options[:set]
47-
@last = options[:last]
72+
self.last = options[:last]
4873
@from = options[:from] if options[:from]
4974
@until = options[:until] if options[:until]
5075
@expiration = expiration if expiration
5176
@total = total if total
5277
end
53-
78+
5479
# convenience method for setting the offset of the next set of results
5580
def next(last)
56-
@last = last
81+
self.last = last
5782
self
5883
end
59-
84+
6085
def ==(other)
6186
prefix == other.prefix and set == other.set and from == other.from and
62-
self.until == other.until and last == other.last and
87+
self.until == other.until and last == other.last and
6388
expiration == other.expiration and total == other.total
6489
end
65-
90+
6691
# output an xml resumption token
6792
def to_xml
6893
xml = Builder::XmlMarkup.new
6994
xml.resumptionToken(encode_conditions, hash_of_attributes)
7095
xml.target!
7196
end
72-
97+
7398
# return a hash containing just the model selection parameters
7499
def to_conditions_hash
75100
conditions = {:metadata_prefix => self.prefix }
@@ -78,20 +103,31 @@ def to_conditions_hash
78103
conditions[:until] = self.until if self.until
79104
conditions
80105
end
81-
82-
# return the a string representation of the token minus the offset
106+
107+
# return the a string representation of the token minus the offset/ID
108+
#
109+
# Q: Why does it eliminate the offset/id "last" on the end? Doesn't fully
110+
# represent state without it, which is confusing. Not sure, but
111+
# other code seems to rely on it, tests break if not.
83112
def to_s
84113
encode_conditions.gsub(/:\w+?$/, '')
85114
end
86115

87116
private
88-
117+
118+
# take care of our logic to store an integer and a str version, for backwards
119+
# compat where it was assumed to be an integer, as well as supporting string.
120+
def last=(value)
121+
@last = value.to_i
122+
@last_str = value.to_s
123+
end
124+
89125
def encode_conditions
90126
encoded_token = @prefix.to_s.dup
91127
encoded_token << ".s(#{set})" if set
92128
encoded_token << ".f(#{self.from.utc.xmlschema})" if self.from
93129
encoded_token << ".u(#{self.until.utc.xmlschema})" if self.until
94-
encoded_token << ":#{last}"
130+
encoded_token << ":#{last_str}"
95131
end
96132

97133
def hash_of_attributes

test/activerecord_provider/helpers/providers.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ class SimpleResumptionProvider < OAI::Provider::Base
3535
source_model ActiveRecordWrapper.new(DCField, :limit => 25)
3636
end
3737

38+
class SimpleResumptionProviderWithNonIntegerID < OAI::Provider::Base
39+
repository_name 'ActiveRecord Resumption Provider With Non-Integer ID'
40+
repository_url 'http://localhost'
41+
record_prefix 'oai:test'
42+
source_model ActiveRecordWrapper.new(DCField, :limit => 25, identifier_field: "source")
43+
end
44+
3845
class CachingResumptionProvider < OAI::Provider::Base
3946
repository_name 'ActiveRecord Caching Resumption Provider'
4047
repository_url 'http://localhost'

test/activerecord_provider/tc_simple_paging_provider.rb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,38 @@ def test_full_harvest
88
assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"]
99
assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size
1010
token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text
11+
1112
doc = Document.new(@provider.list_records(:resumption_token => token))
1213
assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"]
1314
token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text
1415
assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size
16+
1517
doc = Document.new(@provider.list_records(:resumption_token => token))
1618
assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"]
1719
token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text
1820
assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size
21+
1922
doc = Document.new(@provider.list_records(:resumption_token => token))
2023
assert_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"]
2124
assert_equal 25, doc.elements["/OAI-PMH/ListRecords"].to_a.size
2225
end
2326

27+
def test_non_integer_identifiers_resumption
28+
@provider = SimpleResumptionProviderWithNonIntegerID.new
29+
30+
doc = Document.new(@provider.list_records(:metadata_prefix => 'oai_dc'))
31+
assert_not_nil doc.elements["/OAI-PMH/ListRecords/resumptionToken"]
32+
assert_equal 26, doc.elements["/OAI-PMH/ListRecords"].to_a.size
33+
token = doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text
34+
35+
next_doc = Document.new(@provider.list_records(:resumption_token => token))
36+
assert_not_nil next_doc.elements["/OAI-PMH/ListRecords/resumptionToken"]
37+
next_token = next_doc.elements["/OAI-PMH/ListRecords/resumptionToken"].text
38+
assert_equal 26, next_doc.elements["/OAI-PMH/ListRecords"].to_a.size
39+
40+
assert_not_equal token, next_token
41+
end
42+
2443
def test_from_and_until
2544
first_id = DCField.order("id asc").first.id
2645
DCField.where("id < #{first_id + 25}").update_all(updated_at: Time.parse("September 15 2005"))

test/provider/tc_resumption_tokens.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,10 @@ def test_resumption_token_to_xml
4343
assert_equal "#{@token.to_s}:#{@token.last}", doc.elements['/resumptionToken'].text
4444
end
4545

46+
def test_resumption_token_id_does_not_need_to_be_numeric
47+
serialized = "oai_dc.s(A).f(2005-01-01T17:00:00Z).u(2005-01-31T17:00:00Z):FA129C"
48+
49+
token = ResumptionToken.parse(serialized)
50+
assert_equal serialized, token.send(:encode_conditions)
51+
end
4652
end

0 commit comments

Comments
 (0)