156156 HEADER_INDICATORS,
157157)
158158
159- # `Headers` gets called _alot_ (on every line), so we try very hard to be
160- # efficient.
161- function Headers (s)
159+ function parse_single_header (s:: AbstractString )
162160 N = length (s)
163161 x = first (s)
164162 if N == 3
@@ -180,7 +178,7 @@ function Headers(s)
180178 return HEADER_ENDATA
181179 end
182180 elseif N == 7
183- if (x == ' C' || x == ' c' ) && ( uppercase (s) == " COLUMNS" )
181+ if (x == ' C' || x == ' c' ) && uppercase (s) == " COLUMNS"
184182 return HEADER_COLUMNS
185183 elseif (x == ' Q' || x == ' q' )
186184 header = uppercase (s)
@@ -190,34 +188,125 @@ function Headers(s)
190188 return HEADER_QMATRIX
191189 end
192190 end
193- elseif N > = 8
194- if (x == ' O' || x == ' o' ) && startswith ( uppercase (s), " OBJSENSE" )
191+ elseif N = = 8
192+ if (x == ' O' || x == ' o' ) && uppercase (s) == " OBJSENSE"
195193 return HEADER_OBJSENSE
196- elseif (x == ' Q' || x == ' q' )
197- header = uppercase (s)
198- if startswith (header, " QCMATRIX" )
199- return HEADER_QCMATRIX
200- elseif startswith (header, " QSECTION" )
201- return HEADER_QSECTION
194+ end
195+ elseif N == 10
196+ if (x == ' I' || x == ' i' ) && uppercase (s) == " INDICATORS"
197+ return HEADER_INDICATORS
198+ end
199+ end
200+ return HEADER_UNKNOWN
201+ end
202+
203+ function parse_double_header (s:: AbstractString )
204+ N = length (s)
205+ x = first (s)
206+ if N != 8
207+ return HEADER_UNKNOWN
208+ elseif (x == ' O' || x == ' o' ) && uppercase (s) == " OBJSENSE"
209+ return HEADER_OBJSENSE
210+ elseif (x == ' Q' || x == ' q' )
211+ header = uppercase (s)
212+ if startswith (header, " QCMATRIX" )
213+ return HEADER_QCMATRIX
214+ elseif startswith (header, " QSECTION" )
215+ return HEADER_QSECTION
216+ end
217+ end
218+ return HEADER_UNKNOWN
219+ end
220+
221+ """
222+ LineToItems(line::String)
223+
224+ Split on any whitespace characters. We can't split only on `' '` because at
225+ least one models in MIPLIB has `\t ` as a separator.
226+
227+ This decision assumes that we are parsing a free MPS file, where whitespace is
228+ disallowed in names. If this ever becomes a problem, we could change to the
229+ fixed MPS format, where the files are split at the usual offsets.
230+
231+ This function is a more performant version of:
232+ ```julia
233+ LineToItems(line::String) = split(line, r"\\ s"; keepempty = false)
234+ ```
235+ """
236+ struct LineToItems
237+ line:: String
238+ nfields:: Int
239+ fields:: NTuple{5,UnitRange{Int}}
240+
241+ function LineToItems (line:: String )
242+ nfields, f1, f2, f3, f4, f5 = 0 , 0 : 0 , 0 : 0 , 0 : 0 , 0 : 0 , 0 : 0
243+ start, in_field = - 1 , false
244+ n = ncodeunits (line)
245+ for i in 1 : n
246+ if isspace (line[i])
247+ if in_field
248+ nfields += 1
249+ if nfields == 1
250+ f1 = start: (i- 1 )
251+ elseif nfields == 2
252+ f2 = start: (i- 1 )
253+ elseif nfields == 3
254+ f3 = start: (i- 1 )
255+ elseif nfields == 4
256+ f4 = start: (i- 1 )
257+ elseif nfields == 5
258+ f5 = start: (i- 1 )
259+ end
260+ in_field = false
261+ end
262+ elseif ! in_field
263+ start = i
264+ in_field = true
202265 end
203- elseif N == 10
204- if (x == ' I' || x == ' i' ) && uppercase (s) == " INDICATORS"
205- return HEADER_INDICATORS
266+ end
267+ if in_field
268+ nfields += 1
269+ if nfields == 1
270+ f1 = start: n
271+ elseif nfields == 2
272+ f2 = start: n
273+ elseif nfields == 3
274+ f3 = start: n
275+ elseif nfields == 4
276+ f4 = start: n
277+ elseif nfields == 5
278+ f5 = start: n
206279 end
207280 end
281+ return new (line, nfields, (f1, f2, f3, f4, f5))
208282 end
209- return HEADER_UNKNOWN
210283end
211284
212- function line_to_items (line)
213- # Split on any whitespace characters. We can't split only on `' '` because
214- # at least one models in MIPLIB has `\t` as a separator.
215- #
216- # This decision assumes that we are parsing a free MPS file, where
217- # whitespace is disallowed in names. If this ever becomes a problem, we
218- # could change to the fixed MPS format, where the files are split at the
219- # usual offsets.
220- return split (line, r" \s " ; keepempty = false )
285+ Base. length (x:: LineToItems ) = x. nfields
286+
287+ function Base. getindex (x:: LineToItems , i:: Int )
288+ @assert 1 <= i <= x. nfields
289+ return SubString (x. line, x. fields[i])
290+ end
291+
292+ Base. iterate (x:: LineToItems ) = iterate (x, 1 )
293+
294+ function Base. iterate (x:: LineToItems , i)
295+ if i > x. nfields
296+ return nothing
297+ end
298+ return x[i], i + 1
299+ end
300+
301+ # `parse_header` gets called _alot_ (on every line), so we try very hard to be
302+ # efficient.
303+ function parse_header (s:: LineToItems )
304+ if length (s) == 1
305+ return parse_single_header (s[1 ])
306+ elseif length (s) == 2
307+ return parse_double_header (s[1 ])
308+ end
309+ return HEADER_UNKNOWN
221310end
222311
223312"""
@@ -237,13 +326,12 @@ function Base.read!(io::IO, model::Model{T}) where {T}
237326 if startswith (data. contents, ' *' )
238327 continue # Lines starting with `*` are comments
239328 end
240- line = string ( strip ( data. contents) )
241- if isempty (line)
329+ items = LineToItems ( data. contents)
330+ if length (items) == 0
242331 continue # Skip blank lines
243332 end
244- h = Headers (line )
333+ h = parse_header (items )
245334 if h == HEADER_OBJSENSE
246- items = line_to_items (line)
247335 if length (items) == 2
248336 sense = uppercase (items[2 ])
249337 if ! (sense in (" MIN" , " MAX" ))
@@ -258,7 +346,6 @@ function Base.read!(io::IO, model::Model{T}) where {T}
258346 end
259347 continue
260348 elseif h == HEADER_QCMATRIX || h == HEADER_QSECTION
261- items = line_to_items (line)
262349 if length (items) != 2
263350 _throw_parse_error (
264351 data,
@@ -274,10 +361,8 @@ function Base.read!(io::IO, model::Model{T}) where {T}
274361 continue
275362 end
276363 # Otherwise, carry on with the previous header
277- # TODO : split into hard fields based on column indices.
278- items = line_to_items (line)
279364 if header == HEADER_NAME
280- parse_name_line (data, line )
365+ parse_name_line (data)
281366 elseif header == HEADER_OBJSENSE
282367 sense = uppercase (only (items))
283368 if ! (sense in (" MIN" , " MAX" ))
490575# NAME
491576# ==============================================================================
492577
493- function parse_name_line (data:: TempMPSModel , line )
494- m = match (r" ^\s *NAME(.*)" i , line )
578+ function parse_name_line (data:: TempMPSModel )
579+ m = match (r" ^\s *NAME(.*)" i , data . contents )
495580 if m === nothing
496581 _throw_parse_error (
497582 data,
506591# ROWS
507592# ==============================================================================
508593
509- function parse_rows_line (data:: TempMPSModel{T} , items:: Vector ) where {T}
594+ function parse_rows_line (data:: TempMPSModel{T} , items) where {T}
510595 if length (items) < 2
511596 _throw_parse_error (
512597 data,
@@ -619,7 +704,7 @@ function _set_intorg(data::TempMPSModel{T}, column, column_name) where {T}
619704 return
620705end
621706
622- function parse_columns_line (data:: TempMPSModel{T} , items:: Vector ) where {T}
707+ function parse_columns_line (data:: TempMPSModel{T} , items) where {T}
623708 if length (items) == 3
624709 # [column name] [row name] [value]
625710 column_name, row_name, value = items
657742# RHS
658743# ==============================================================================
659744
660- function parse_single_rhs (data, row_name, value, items:: Vector )
745+ function parse_single_rhs (data, row_name, value, items)
661746 if row_name == data. obj_name
662747 data. obj_constant = value
663748 return
@@ -688,7 +773,7 @@ function parse_single_rhs(data, row_name, value, items::Vector)
688773end
689774
690775# TODO : handle multiple RHS vectors.
691- function parse_rhs_line (data:: TempMPSModel{T} , items:: Vector ) where {T}
776+ function parse_rhs_line (data:: TempMPSModel{T} , items) where {T}
692777 if length (items) == 3
693778 # [rhs name] [row name] [value]
694779 rhs_name, row_name, value = items
@@ -744,7 +829,7 @@ function parse_single_range(data, row_name, value)
744829end
745830
746831# TODO : handle multiple RANGES vectors.
747- function parse_ranges_line (data:: TempMPSModel{T} , items:: Vector ) where {T}
832+ function parse_ranges_line (data:: TempMPSModel{T} , items) where {T}
748833 if length (items) == 3
749834 # [rhs name] [row name] [value]
750835 _, row_name, value = items
@@ -859,7 +944,7 @@ function _parse_single_bound(
859944 end
860945end
861946
862- function parse_bounds_line (data:: TempMPSModel{T} , items:: Vector ) where {T}
947+ function parse_bounds_line (data:: TempMPSModel{T} , items) where {T}
863948 if length (items) == 3
864949 bound_type, _, column_name = items
865950 _parse_single_bound (data, column_name, bound_type)
0 commit comments