291 lines
10 KiB
C++
291 lines
10 KiB
C++
/*
|
|
* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License, version 2.0,
|
|
* as published by the Free Software Foundation.
|
|
*
|
|
* This program is also distributed with certain software (including
|
|
* but not limited to OpenSSL) that is licensed under separate terms,
|
|
* as designated in a particular file or component or in included license
|
|
* documentation. The authors of MySQL hereby grant you an additional
|
|
* permission to link the program and your derivative works with the
|
|
* separately licensed software that they have included with MySQL.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License, version 2.0, for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
//--------------------------------------------------------------------------------------------------
|
|
|
|
#include <vector>
|
|
#include <stack>
|
|
|
|
#include "utils_string_parsing.h"
|
|
#include "template_utils.h"
|
|
|
|
namespace shcore {
|
|
namespace mysql {
|
|
namespace splitter {
|
|
//--------------------------------------------------------------------------------------------------
|
|
|
|
const unsigned char *skip_leading_whitespace(const unsigned char *head,
|
|
const unsigned char *tail) {
|
|
while (head < tail && *head <= ' ') head++;
|
|
return head;
|
|
}
|
|
|
|
//--------------------------------------------------------------------------------------------------
|
|
|
|
bool is_line_break(const unsigned char *head, const unsigned char *line_break) {
|
|
if (*line_break == '\0') return false;
|
|
|
|
while (*head != '\0' && *line_break != '\0' && *head == *line_break) {
|
|
head++;
|
|
line_break++;
|
|
}
|
|
return *line_break == '\0';
|
|
}
|
|
|
|
//--------------------------------------------------------------------------------------------------
|
|
|
|
/**
|
|
* A statement splitter to take a list of sql statements and split them into
|
|
*individual statements,
|
|
* return their position and length in the original string (instead the copied
|
|
*strings).
|
|
*
|
|
* A tweak was added to the function to return the number of complete statements
|
|
*found, where
|
|
* complete means the ending delimiter was found.
|
|
*/
|
|
size_t determineStatementRanges(const char *sql, size_t length,
|
|
std::string &delimiter,
|
|
std::vector<std::pair<size_t, size_t>> &ranges,
|
|
const std::string &line_break,
|
|
std::stack<std::string> &input_context_stack) {
|
|
int full_statement_count = 0;
|
|
const unsigned char *delimiter_head =
|
|
pointer_cast<const unsigned char *>(delimiter.c_str());
|
|
|
|
const unsigned char keyword[] = "delimiter";
|
|
|
|
const unsigned char *head = pointer_cast<const unsigned char *>(sql);
|
|
const unsigned char *tail = head;
|
|
const unsigned char *end = head + length;
|
|
const unsigned char *new_line =
|
|
pointer_cast<const unsigned char *>(line_break.c_str());
|
|
bool have_content = false; // Set when anything else but comments were found
|
|
// for the current statement.
|
|
|
|
ranges.clear();
|
|
|
|
while (tail < end) {
|
|
switch (*tail) {
|
|
case '*': // Comes from a multiline comment and comment is done
|
|
if (*(tail + 1) == '/' && (!input_context_stack.empty() &&
|
|
input_context_stack.top() == "/*")) {
|
|
if (!input_context_stack.empty()) input_context_stack.pop();
|
|
|
|
tail += 2;
|
|
head = tail; // Skip over the comment.
|
|
}
|
|
break;
|
|
case '/': // Possible multi line comment or hidden (conditional) command.
|
|
if (*(tail + 1) == '*') {
|
|
tail += 2;
|
|
bool is_hidden_command = (*tail == '!');
|
|
while (true) {
|
|
while (tail < end && *tail != '*') tail++;
|
|
if (tail == end) // Unfinished comment.
|
|
{
|
|
input_context_stack.push("/*");
|
|
break;
|
|
} else {
|
|
if (*++tail == '/') {
|
|
tail++; // Skip the slash too.
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!is_hidden_command && !have_content)
|
|
head = tail; // Skip over the comment.
|
|
}
|
|
break;
|
|
|
|
case '-': // Possible single line comment.
|
|
{
|
|
const unsigned char *end_char = tail + 2;
|
|
if (*(tail + 1) == '-' &&
|
|
(*end_char == ' ' || *end_char == '\t' ||
|
|
is_line_break(end_char, new_line) || length == 2)) {
|
|
// Skip everything until the end of the line.
|
|
tail += 2;
|
|
while (tail < end && !is_line_break(tail, new_line)) tail++;
|
|
if (!have_content) head = tail;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case '#': // MySQL single line comment.
|
|
while (tail < end && !is_line_break(tail, new_line)) tail++;
|
|
if (!have_content) head = tail;
|
|
break;
|
|
|
|
case '"':
|
|
case '\'':
|
|
case '`': {
|
|
have_content = true;
|
|
char quote = *tail++;
|
|
|
|
if (input_context_stack.empty() || input_context_stack.top() == "-") {
|
|
_again:
|
|
// Quoted string/id. Skip this in a local loop if is opening quote.
|
|
while (tail < end && *tail != quote) {
|
|
// Skip any escaped character too.
|
|
if (*tail == '\\') tail++;
|
|
tail++;
|
|
}
|
|
if (*tail == quote) {
|
|
if (*(tail + 1) == quote) {
|
|
tail += 2;
|
|
goto _again; // double quote in quote treated as escaped quote
|
|
}
|
|
tail++; // Skip trailing quote char to if one was there.
|
|
} else {
|
|
std::string q;
|
|
q.assign("e, 1);
|
|
input_context_stack.push(
|
|
q); // Sets multiline opening quote to continue processing
|
|
}
|
|
} else // Closing quote, clears the multiline flag
|
|
input_context_stack.pop();
|
|
|
|
break;
|
|
}
|
|
|
|
case 'd':
|
|
case 'D': {
|
|
have_content = true;
|
|
|
|
// Possible start of the keyword DELIMITER. Must be at the start of the
|
|
// text or a character,
|
|
// which is not part of a regular MySQL identifier (0-9, A-Z, a-z, _, $,
|
|
// \u0080-\uffff).
|
|
unsigned char previous =
|
|
tail > pointer_cast<const unsigned char *>(sql) ? *(tail - 1) : 0;
|
|
bool is_identifier_char =
|
|
previous >= 0x80 || (previous >= '0' && previous <= '9') ||
|
|
((previous | 0x20) >= 'a' && (previous | 0x20) <= 'z') ||
|
|
previous == '$' || previous == '_';
|
|
if (tail == pointer_cast<const unsigned char *>(sql) ||
|
|
!is_identifier_char) {
|
|
const unsigned char *run = tail + 1;
|
|
const unsigned char *kw = keyword + 1;
|
|
int count = 9;
|
|
while (count-- > 1 && (*run++ | 0x20) == *kw++)
|
|
;
|
|
if (count == 0 && *run == ' ') {
|
|
// Delimiter keyword found. Get the new delimiter (everything until
|
|
// the end of the line).
|
|
tail = run++;
|
|
while (run < end && !is_line_break(run, new_line)) run++;
|
|
|
|
delimiter =
|
|
std::string(pointer_cast<const char *>(tail), run - tail);
|
|
aux::trim(delimiter);
|
|
|
|
delimiter_head =
|
|
pointer_cast<const unsigned char *>(delimiter.c_str());
|
|
|
|
// Skip over the delimiter statement and any following line breaks.
|
|
while (is_line_break(run, new_line)) run++;
|
|
tail = run;
|
|
head = tail;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (*tail == *delimiter_head) {
|
|
// Found possible start of the delimiter. Check if it really is.
|
|
size_t count = delimiter.size();
|
|
if (count == 1) {
|
|
// Most common case. Trim the statement and check if it is not empty
|
|
// before adding the range.
|
|
head = skip_leading_whitespace(head, tail);
|
|
if (head < tail || (!input_context_stack.empty() &&
|
|
input_context_stack.top() == "-")) {
|
|
full_statement_count++;
|
|
|
|
if (!input_context_stack.empty()) input_context_stack.pop();
|
|
|
|
if (head < tail)
|
|
ranges.push_back(std::make_pair<size_t, size_t>(
|
|
head - pointer_cast<const unsigned char *>(sql), tail - head));
|
|
}
|
|
head = ++tail;
|
|
have_content = false;
|
|
} else {
|
|
const unsigned char *run = tail + 1;
|
|
const unsigned char *del = delimiter_head + 1;
|
|
while (count-- > 1 && (*run++ == *del++))
|
|
;
|
|
|
|
if (count == 0) {
|
|
// Multi char delimiter is complete. Tail still points to the start of
|
|
// the delimiter.
|
|
// Run points to the first character after the delimiter.
|
|
head = skip_leading_whitespace(head, tail);
|
|
if (head < tail || (!input_context_stack.empty() &&
|
|
input_context_stack.top() == "-")) {
|
|
full_statement_count++;
|
|
|
|
if (!input_context_stack.empty()) input_context_stack.pop();
|
|
|
|
if (head < tail)
|
|
ranges.push_back(std::make_pair<size_t, size_t>(
|
|
head - pointer_cast<const unsigned char *>(sql),
|
|
tail - head));
|
|
}
|
|
|
|
tail = run;
|
|
head = run;
|
|
have_content = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Multiline comments are ignored, everything else is not
|
|
if (*tail > ' ' &&
|
|
(input_context_stack.empty() || input_context_stack.top() != "/*"))
|
|
have_content = true;
|
|
tail++;
|
|
}
|
|
|
|
// Add remaining text to the range list but ignores it when it is a multiline
|
|
// comment
|
|
head = skip_leading_whitespace(head, tail);
|
|
if (head < tail &&
|
|
(input_context_stack.empty() || input_context_stack.top() != "/*")) {
|
|
ranges.push_back(std::make_pair<size_t, size_t>(
|
|
head - pointer_cast<const unsigned char *>(sql), tail - head));
|
|
|
|
// If not a multiline string then sets the flag to multiline statement (not
|
|
// terminated)
|
|
if (input_context_stack.empty()) input_context_stack.push("-");
|
|
}
|
|
|
|
return full_statement_count;
|
|
}
|
|
} // namespace splitter
|
|
} // namespace mysql
|
|
} // namespace shcore
|