
The PyLint output is marking the erroneous location visually by using ^ in the next line. This indicator is always prefixed by multiple spaces and the line does not contain any further text. Now, it might happen that the indicator is between two source lines that use the same quote type. Since we detect quotes through our regex, this occurrence would qualify, effectively enquoting the linter indicator ^. As a consequence of our quote handling, we normally prevented quoted text from being split. However, in the described scenario, this is not desired, since we are not dealing with a regular quote (but an accidental mismatch). Therefore, with this commit, we disable our quote handling for those multi-line quotes where one line fully represents a typical PyLint indicator line. This should restore the desired line-matching behavior. Fixes CODEOCEAN-12N
129 lines
5.4 KiB
Ruby
129 lines
5.4 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class Runner::Connection::Buffer
|
|
# The WebSocket connection might group multiple lines. For further processing, we require all lines
|
|
# to be processed separately. Therefore, we split the lines by each newline character not part of an enclosed
|
|
# substring either in single or double quotes (e.g., within a JSON). Originally, each line break consists of `\r\n`.
|
|
# We keep the `\r` at the end of the line (keeping "empty" lines) and replace it after buffering.
|
|
# Inspired by https://stackoverflow.com/questions/13040585/split-string-by-spaces-properly-accounting-for-quotes-and-backslashes-ruby
|
|
SPLIT_INDIVIDUAL_LINES = /(?:"""(?:\\"|(?!\r\n +\^\r\n)[^"])*"""|"(?!"")(?:\\"|(?!\r\n +\^\r\n)[^"])*"|''(?:\\'|(?!\r\n +\^\r\n)[^'])*'''|'(?!'')(?:\\'|(?!\r\n +\^\r\n)[^'])*'|[#\\][^\r\n]*|(?:[^\r\n]|\r(?=\n)))+/
|
|
|
|
def initialize
|
|
@global_buffer = +''
|
|
@buffering = false
|
|
@line_buffer = Queue.new
|
|
super
|
|
end
|
|
|
|
def store(event_data)
|
|
# First, we append the new data to the existing `@global_buffer`.
|
|
# Either, the `@global_buffer` is empty and this is a NO OP.
|
|
# Or, the `@global_buffer` contains an incomplete string and thus requires the new part.
|
|
@global_buffer += event_data
|
|
# We process the full `@global_buffer`. Valid parts are removed from the buffer and
|
|
# the remaining invalid parts are still stored in `@global_buffer`.
|
|
@global_buffer = process_and_split @global_buffer
|
|
end
|
|
|
|
def events
|
|
# Return all items from `@line_buffer` in an array (which is iterable) and clear the queue
|
|
Array.new(@line_buffer.size) { @line_buffer.pop }
|
|
end
|
|
|
|
def flush
|
|
raise Error::NotEmpty unless @line_buffer.empty?
|
|
|
|
remaining_buffer = @global_buffer
|
|
@buffering = false
|
|
@global_buffer = +''
|
|
remaining_buffer
|
|
end
|
|
|
|
def empty?
|
|
@line_buffer.empty? && @global_buffer.empty?
|
|
end
|
|
|
|
private
|
|
|
|
def process_and_split(message_parts, stop: false)
|
|
# We need a temporary buffer to operate on
|
|
buffer = +''
|
|
# We split lines by `\n` and want to normalize them to be separated by `\r\n`.
|
|
# This allows us to identify a former line end with `\r` (as the `\n` is not matched)
|
|
# All results returned from this buffer are normalized to feature `\n` line endings.
|
|
normalized_line_endings(message_parts).scan(SPLIT_INDIVIDUAL_LINES).each do |line|
|
|
# Same argumentation as above: We can always append (previous empty or invalid)
|
|
buffer += line
|
|
|
|
if buffering_required_for? buffer
|
|
@buffering = true
|
|
# Check the existing substring `buffer` if it contains a valid message.
|
|
# The remaining buffer is stored for further processing.
|
|
buffer = process_and_split buffer, stop: true unless stop
|
|
else
|
|
add_to_line_buffer(buffer)
|
|
# Clear the current buffer.
|
|
buffer = +''
|
|
end
|
|
end
|
|
# Return the remaining buffer which might become the `@global_buffer`
|
|
buffer
|
|
end
|
|
|
|
def normalized_line_endings(string)
|
|
# First, we ensure line endings are only represented by `\n`, regardless of the original line ending.
|
|
# Then, we convert all line endings to `\r\n` to ensure we can identify the `\r` at the end of a line.
|
|
# This "double conversion" is required to prevent line endings with \r\r\n.
|
|
normalized = string.encode(universal_newline: true).encode(crlf_newline: true)
|
|
|
|
# If the original input string ends with `\r`, it is incomplete and needs buffering.
|
|
# However, through our above normalization, the string would not end with `\r` anymore (but `\r\n`).
|
|
# Hence, in this case, we just remove the last character, so that all other line endings within the string remain unchanged.
|
|
if string.ends_with?("\r")
|
|
normalized[...-1]
|
|
else
|
|
normalized
|
|
end
|
|
end
|
|
|
|
def add_to_line_buffer(message)
|
|
@buffering = false
|
|
@global_buffer = +''
|
|
# For our buffering, we identified line breaks with the `\n` and removed those temporarily.
|
|
# Thus, we now re-add the `\n` at the end of the string and remove the `\r` at the same time.
|
|
# Still, some messages might still contain a `\r\n` within strings (e.g., received from Python for the linter).
|
|
message = message.gsub(/\r(?!\n)$/, "\n")
|
|
@line_buffer.push message
|
|
end
|
|
|
|
def buffering_required_for?(message)
|
|
# First, check if the message is very short and start with {
|
|
return true if message.size <= 5 && message.start_with?(/\s*{/)
|
|
|
|
invalid_json = !valid_json?(message)
|
|
# Second, if we have the beginning of a valid command but an invalid JSON
|
|
return true if invalid_json && message.start_with?(/\s*{"cmd/)
|
|
# Third, buffer the message if it contains long messages (e.g., an image or turtle batch commands)
|
|
return true if invalid_json && (message.start_with?('<img') || message.include?('"turtlebatch"'))
|
|
# Fourth, if we have an odd number of quotes and no `\r` at the end, we might have an incomplete message
|
|
return true if (message.count('"').odd? || message.count("'").odd?) && !message.end_with?("\r")
|
|
|
|
# If nothing applies, we don't want to buffer the current message
|
|
false
|
|
end
|
|
|
|
def currently_buffering?
|
|
@buffering
|
|
end
|
|
|
|
def valid_json?(data)
|
|
# Try parsing the JSON. If that is successful, we have a valid JSON (otherwise not)
|
|
JSON.parse(data)
|
|
# Additionally, check if the string ends with \r and return that result.
|
|
# All JSON messages received through the Runner::Connection will end in a line break!
|
|
data.end_with?("\r")
|
|
rescue JSON::ParserError
|
|
false
|
|
end
|
|
end
|