# frozen_string_literal: true class Runner::Connection::Buffer # The WebSocket connection might group multiple lines. For further processing, we require all lines # to be processed separately. Therefore, we split the lines by each newline character not part of an enclosed # substring either in single or double quotes (e.g., within a JSON). Originally, each line break consists of `\r\n`. # We keep the `\r` at the end of the line (keeping "empty" lines) and replace it after buffering. # Inspired by https://stackoverflow.com/questions/13040585/split-string-by-spaces-properly-accounting-for-quotes-and-backslashes-ruby SPLIT_INDIVIDUAL_LINES = /(?:"""(?:\\"|[^"])*"""|"(?!"")(?:\\"|[^"])*"|''(?:\\'|[^'])*'''|'(?!'')(?:\\'|[^'])*'|(?:[^\r\n]|\r(?=\n)))+/ def initialize @global_buffer = +'' @buffering = false @line_buffer = Queue.new super end def store(event_data) # First, we append the new data to the existing `@global_buffer`. # Either, the `@global_buffer` is empty and this is a NO OP. # Or, the `@global_buffer` contains an incomplete string and thus requires the new part. @global_buffer += event_data # We process the full `@global_buffer`. Valid parts are removed from the buffer and # the remaining invalid parts are still stored in `@global_buffer`. @global_buffer = process_and_split @global_buffer end def events # Return all items from `@line_buffer` in an array (which is iterable) and clear the queue Array.new(@line_buffer.size) { @line_buffer.pop } end def flush raise Error::NotEmpty unless @line_buffer.empty? remaining_buffer = @global_buffer @buffering = false @global_buffer = +'' remaining_buffer end def empty? @line_buffer.empty? && @global_buffer.empty? end private def process_and_split(message_parts, stop: false) # We need a temporary buffer to operate on buffer = +'' # We split lines by `\n` and want to normalize them to be separated by `\r\n`. # This allows us to identify a former line end with `\r` (as the `\n` is not matched) # All results returned from this buffer are normalized to feature `\n` line endings. normalized_line_endings(message_parts).scan(SPLIT_INDIVIDUAL_LINES).each do |line| # Same argumentation as above: We can always append (previous empty or invalid) buffer += line if buffering_required_for? buffer @buffering = true # Check the existing substring `buffer` if it contains a valid message. # The remaining buffer is stored for further processing. buffer = process_and_split buffer, stop: true unless stop else add_to_line_buffer(buffer) # Clear the current buffer. buffer = +'' end end # Return the remaining buffer which might become the `@global_buffer` buffer end def normalized_line_endings(string) # First, we ensure line endings are only represented by `\n`, regardless of the original line ending. # Then, we convert all line endings to `\r\n` to ensure we can identify the `\r` at the end of a line. # This "double conversion" is required to prevent line endings with \r\r\n. normalized = string.encode(universal_newline: true).encode(crlf_newline: true) # If the original input string ends with `\r`, it is incomplete and needs buffering. # However, through our above normalization, the string would not end with `\r` anymore (but `\r\n`). # Hence, in this case, we just remove the last character, so that all other line endings within the string remain unchanged. if string.ends_with?("\r") normalized[...-1] else normalized end end def add_to_line_buffer(message) @buffering = false @global_buffer = +'' # For our buffering, we identified line breaks with the `\n` and removed those temporarily. # Thus, we now re-add the `\n` at the end of the string and remove the `\r` at the same time. # Still, some messages might still contain a `\r\n` within strings (e.g., received from Python for the linter). message = message.gsub(/\r(?!\n)$/, "\n") @line_buffer.push message end def buffering_required_for?(message) # First, check if the message is very short and start with { return true if message.size <= 5 && message.start_with?(/\s*{/) invalid_json = !valid_json?(message) # Second, if we have the beginning of a valid command but an invalid JSON return true if invalid_json && message.start_with?(/\s*{"cmd/) # Third, buffer the message if it contains long messages (e.g., an image or turtle batch commands) return true if invalid_json && (message.start_with?('