diff --git a/.gitignore b/.gitignore index 870ca9cbdc136f4823abee3dce85671fc5ccd26c..392e7cb40eeadd3310ac497d89a6d66d4cfcab32 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ .vscode .idea CMakeUserPresets.json +/metrics/cmake-build-debug" diff --git a/CMakeLists.txt b/CMakeLists.txt index 5054100d4d9df7e09087c90a11fdac68ac7ce761..b959d1188b64734fa3464a82ec2f6a7649e14c29 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,11 +5,19 @@ project(SourcedOut CXX) # find_package(antlr4-runtime REQUIRED) find_package(Boost 1.8.1 REQUIRED) # find_package(libpqxx REQUIRED) -find_package(GTest REQUIRED) + set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) message(STATUS ${Boost_LIBRARIES}) set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "-lpthread -pthread") + +option(BUILD_TESTS "build tests") +#if(BUILD_TESTS) +enable_testing() +find_package(GTest REQUIRED) +#endif() + +add_subdirectory(metrics) + add_executable(${PROJECT_NAME} src/main.cpp) -#add_executable(${PROJECT_NAME} text-basic-metrics/tbm_main.cpp text-basic-metrics/tbm_main.cpp) строка для запуска моей части в text-basic-metrics -target_link_libraries(${PROJECT_NAME} ${Boost_LIBRARIES} ${antlr4-runtime_LIBRARIES} ${libpqxx_LIBRARIES} ${GTest_LIBRARIES}) \ No newline at end of file +target_link_libraries(${PROJECT_NAME} ${Boost_LIBRARIES} ${antlr4-runtime_LIBRARIES} ${libpqxx_LIBRARIES} ${GTest_LIBRARIES} MetricsLib) \ No newline at end of file diff --git a/metrics/CMakeLists.txt b/metrics/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..531f6f6bfdc5a9bc7564cc06c844806cc402cc43 --- /dev/null +++ b/metrics/CMakeLists.txt @@ -0,0 +1,28 @@ +project("MetricsLib") + +file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) +file(GLOB INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include) + + +include_directories(${INCLUDE_DIRS}) +add_library(${PROJECT_NAME} ${SOURCES}) + +message("ANTLR4_LIB = ${ANTLR4_LIB}") +message("ANTLR4_LIB_INCLUDE_DIRS = ${ANTLR4_LIB_INCLUDE_DIRS}") + +target_include_directories(${PROJECT_NAME} PUBLIC ${INCLUDE_DIRS}) +target_link_libraries(${PROJECT_NAME} ${Boost_LIBRARIES}) + + +set(METRICS_LIBRARY ${PROJECT_NAME}) +set(METRICS_LIBRARY ${METRICS_LIBRARY} PARENT_SCOPE) + + +set(METRICS_lib_INCLUDE_DIRS ${INCLUDE_DIRS}) +set(METRICS_lib_INCLUDE_DIRS ${METRICS_lib_INCLUDE_DIRS} PARENT_SCOPE) + + +enable_testing() +#if(BUILD_TESTS) +add_subdirectory(tests) +#endif() \ No newline at end of file diff --git a/metrics/include/TextMetricsLib.h b/metrics/include/TextMetricsLib.h new file mode 100644 index 0000000000000000000000000000000000000000..4db863f2a574c304d8717383634e2d9629ce59c9 --- /dev/null +++ b/metrics/include/TextMetricsLib.h @@ -0,0 +1,45 @@ +// +// Created by march on 02.05.2023. +// + +#ifndef SOURCEDOUT_DECLARATION_H +#define SOURCEDOUT_DECLARATION_H + +#include +#include +#include +#include +#include +#include + +class ITextMetric { + public: + virtual ~ITextMetric() = default; + virtual void setData(std::string text1, std::string text2) = 0; + virtual double getMetric() = 0; +}; + +class PrepareDataTextMetric : public ITextMetric { + public: + void setData(std::string text1, std::string text2) override; + + protected: + std::vector tokens1; + std::vector tokens2; + + private: + static std::string deleteComments(const std::string& text); + static std::vector tbmTokenizer(const std::string& text); +}; + +class LevDistTextMetric : public PrepareDataTextMetric { + public: + double getMetric() override; +}; + +class JaccardTextMetric : public PrepareDataTextMetric { + public: + double getMetric() override; +}; + +#endif // SOURCEDOUT_DECLARATION_H diff --git a/metrics/include/TokenMetricLib.h b/metrics/include/TokenMetricLib.h new file mode 100644 index 0000000000000000000000000000000000000000..c333fe2abbb5c5b06801d4a19d77f5757dbb21f9 --- /dev/null +++ b/metrics/include/TokenMetricLib.h @@ -0,0 +1,42 @@ +// +// Created by march on 04.05.2023. +// + +#ifndef SOURCEDOUT_TOKENMETRICLIB_H +#define SOURCEDOUT_TOKENMETRICLIB_H + +#include +#include +#include +#include +#include + +#include + + +class ITokenMetric{ + public: + virtual ~ITokenMetric() = default; + virtual void setData(std::vector tokens1, std::vector tokens2) = 0; + virtual double getMetric() = 0; +}; + +class PrepareDataTokenMetric : public ITokenMetric{ +public: + void setData(std::vector _tokens1, std::vector _tokens2) override; +protected: + std::vector tokens1; + std::vector tokens2; +}; + +class LevDistTokenMetric : public PrepareDataTokenMetric{ +public: + double getMetric() override; +}; + +class WShinglingTokenMetric : public PrepareDataTokenMetric{ +public: + double getMetric() override; +}; + +#endif //SOURCEDOUT_TOKENMETRICLIB_H diff --git a/metrics/src/TextMetricImpl.cpp b/metrics/src/TextMetricImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..848c36bf6cfc1da24518fc279cb22275b9c89a26 --- /dev/null +++ b/metrics/src/TextMetricImpl.cpp @@ -0,0 +1,117 @@ +// +// Created by march on 02.05.2023. +// + +#include "TextMetricsLib.h" + + +void PrepareDataTextMetric::setData(std::string text1, std::string text2) { + std::string non_comm_text1 = deleteComments(text1); + std::string non_comm_text2 = deleteComments(text2); + + tokens1 = tbmTokenizer(non_comm_text1); + tokens2 = tbmTokenizer(non_comm_text2); +} + +std::string PrepareDataTextMetric::deleteComments(const std::string& text) { + std::string modif; + std::string res; + + std::stringstream ss; + std::string line; + + ss << text; + + while(getline(ss, line)){ + line.push_back('\0'); + modif += line; + } + + bool s_comm = false; + bool m_comm = false; + + for (size_t i = 0; i < modif.size(); i++){ + if (s_comm && modif[i] == '\0') + s_comm = false; + else if (m_comm && modif[i] == '*' && modif[i + 1] == '/') + m_comm = false, i++; + else if (s_comm || m_comm) + continue; + else if (modif[i] == '/' && modif[i+1] == '/') + s_comm = true, i++; + else if (modif[i] == '/' && modif[i+1] == '*') + m_comm = true, i++; + + else if (modif[i] != '\0') + res += modif[i]; + else if (!res.empty() && res[res.size() - 1] != '\n') + res += '\n'; + } + return res; +} + +std::vector PrepareDataTextMetric::tbmTokenizer(const std::string &text) { + boost::char_separator sep(" {}();,\"\0\'"); + std::vector res; + boost::tokenizer < boost::char_separator > tokens(text, sep); + + for (const std::string &s: tokens) { + if (!s.empty() && s[0] != '\n' && s[0] != '\0'){ + res.push_back(s); + } + } + return res; +} + +double LevDistTextMetric::getMetric(){ + unsigned long n = tokens1.size(); + unsigned long m = tokens2.size(); + int x, y, z; + + std::vector > lev (n, std::vector (m, 0)); + + for (size_t i = 0; i < n; i++){ + for (size_t j = 0; j < m; j++){ + if (std::min(i, j) == 0){ + lev[i][j] = static_cast (std::max(i, j)); + } + else{ + x = lev[i-1][j]; + y = lev[i][j-1]; + z = lev[i-1][j-1]; + lev[i][j] = std::min(x, std::min(y, z)); + if (tokens1[i] != tokens2[j]){ + lev[i][j]++; + } + } + } + } + + if (n == 0 || m == 0) + return 0; + double res = 1.0 - static_cast (lev[n-1][m-1]) / static_cast (std::max(n ,m)); + return res; +} + + +double JaccardTextMetric::getMetric() { + std::set s1; + std::set s2; + + for (auto &i : tokens1) s1.insert(i); + for (auto &i : tokens2) s2.insert(i); + + + std::set intersect_sets; + set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(), + std::inserter(intersect_sets, intersect_sets.begin())); + + std::set union_sets; + set_union(s1.begin(), s1.end(), s2.begin(), s2.end(), + std::inserter(union_sets, union_sets.begin())); + + if (union_sets.empty()) + return 0; + else + return static_cast (intersect_sets.size()) / static_cast (union_sets.size()); +} diff --git a/metrics/src/TokenMetricImpl.cpp b/metrics/src/TokenMetricImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..70cba714e4d75cdaad0dc1fb56bcad7e83e2ce7f --- /dev/null +++ b/metrics/src/TokenMetricImpl.cpp @@ -0,0 +1,79 @@ +// +// Created by march on 04.05.2023. +// + +#include "TokenMetricLib.h" + +double LevDistTokenMetric::getMetric() { + unsigned long n = tokens1.size(); + unsigned long m = tokens2.size(); + int x, y, z; + + std::vector > lev (n, std::vector (m, 0)); + + for (size_t i = 0; i < n; i++){ + for (size_t j = 0; j < m; j++){ + if (std::min(i, j) == 0){ + lev[i][j] = static_cast (std::max(i, j)); + } + else{ + x = lev[i-1][j]; + y = lev[i][j-1]; + z = lev[i-1][j-1]; + lev[i][j] = std::min(x, std::min(y, z)); + if (tokens1[i] != tokens2[j]){ + lev[i][j]++; + } + } + } + } + + if (n == 0 || m == 0) + return 0; + double res = 1.0 - static_cast (lev[n-1][m-1]) / static_cast (std::max(n ,m)); + return res; +} + +double WShinglingTokenMetric::getMetric() { + unsigned long n = tokens1.size(); + unsigned long m = tokens2.size(); + + if (n == 0 || m == 0 || (n < 3 && m < 3)) + return 0; + + std::vector > sh1; + std::vector > sh2; + + for (size_t i = 0; i < n - 3; i++){ + sh1.emplace_back(tokens1[i], tokens1[i+1], tokens1[i+2]); + } + for (size_t i = 0; i < m - 3; i++){ + sh2.emplace_back(tokens2[i], tokens2[i+1], tokens2[i+2]); + } + + std::set > s1; + std::set > s2; + + for (auto &i : sh1) s1.insert(i); + for (auto &i : sh2) s2.insert(i); + + + std::set> intersect_sets; + set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(), + std::inserter(intersect_sets, intersect_sets.begin())); + + std::set> union_sets; + set_union(s1.begin(), s1.end(), s2.begin(), s2.end(), + std::inserter(union_sets, union_sets.begin())); + + if (union_sets.empty()) + return 0; + else + return static_cast (intersect_sets.size()) / static_cast (union_sets.size()); + +} + +void PrepareDataTokenMetric::setData(std::vector _tokens1, std::vector _tokens2) { + tokens1 = _tokens1; + tokens2 = _tokens2; +} diff --git a/text-basic-metrics/code1.txt b/metrics/testProgs/code1.txt similarity index 100% rename from text-basic-metrics/code1.txt rename to metrics/testProgs/code1.txt diff --git a/text-basic-metrics/code2.txt b/metrics/testProgs/code2.txt similarity index 99% rename from text-basic-metrics/code2.txt rename to metrics/testProgs/code2.txt index 2115b76e6866f8af7e14dbd7de1ed6fe3a2c9ec2..76fc711ff01e4d281fde4082e46e2500b9bb4e5e 100644 --- a/text-basic-metrics/code2.txt +++ b/metrics/testProgs/code2.txt @@ -1,5 +1,6 @@ // однострочный комментарий // еще один + // вау еще один #include diff --git a/metrics/tests/CMakeLists.txt b/metrics/tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0b5a55d4c1cf6aeaa1d772a77018a75d11bd41c3 --- /dev/null +++ b/metrics/tests/CMakeLists.txt @@ -0,0 +1,7 @@ +file(GLOB SOURCES src/*.cpp) + +add_executable(metrics_test ${SOURCES}) + +target_link_libraries(metrics_test MetricsLib GTest::gtest_main GTest::gmock) + +add_test(metrics_test metrics_test) \ No newline at end of file diff --git a/metrics/tests/src/main.cpp b/metrics/tests/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..70cfa61104ff822a57412778fd4a5de924e2d23c --- /dev/null +++ b/metrics/tests/src/main.cpp @@ -0,0 +1,13 @@ +// +// Created by march on 16.05.2023. +// + +#include +#include + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/metrics/tests/src/test-codes/code1.txt b/metrics/tests/src/test-codes/code1.txt new file mode 100644 index 0000000000000000000000000000000000000000..5fa95b8f8462b43d7b70943fc4b1f5d124dec881 --- /dev/null +++ b/metrics/tests/src/test-codes/code1.txt @@ -0,0 +1,29 @@ +// однострочный комментарий +// еще один +// вау еще один + +#include +#include +#include + +using namespace std; + +/* многострочный комм + * // внутри него однострочный + * + */ + + +int main() { + stringstream ss; + string res; + // ещё в код напихаю комментов + ss << "a bwfw ce "; + while(getline(ss, res, ' ')){ //комментарий после строки с кодом + /* + * летс гоу + * худшее место для многострочного коммента + */ + cout << res << endl; /* многострочный однострочно */ + } +} diff --git a/metrics/tests/src/text_metrics_tests.cpp b/metrics/tests/src/text_metrics_tests.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aae644ddcb51afe1b1bd2e124f56dd5ff02d2513 --- /dev/null +++ b/metrics/tests/src/text_metrics_tests.cpp @@ -0,0 +1,86 @@ +// +// Created by march on 04.05.2023. +// + +#include +#include + +#include +#include + +#include "TextMetricsLib.h" + +class LevDistTextMetricTest : public ::testing::Test { +protected: + std::unique_ptr levDistTextMetric; + void SetUp(){ + levDistTextMetric = std::make_unique (); + } + void TearDown(){} +}; + +class JaccardTextMetricTest : public ::testing::Test { +protected: + std::unique_ptr jaccardTextMetric; + void SetUp(){ + jaccardTextMetric = std::make_unique (); + } + void TearDown(){} +}; + +TEST_F(LevDistTextMetricTest, check_eq_progs) { + std::ifstream fin1; + fin1.open("src/test-codes/code1.txt"); + + std::string text1( (std::istreambuf_iterator(fin1) ), + (std::istreambuf_iterator() ) ); + + fin1.close(); + levDistTextMetric->setData(text1, text1); + + EXPECT_EQ(levDistTextMetric->getMetric(), 1); +} + +TEST_F(LevDistTextMetricTest, check_absolutely_not_eq_progs) { + + levDistTextMetric->setData("a b c", "d e f g"); + + EXPECT_EQ(levDistTextMetric->getMetric() < 0.5, true); +} + +TEST_F(LevDistTextMetricTest, test_with_empty_prog) { + + levDistTextMetric->setData("a b c", ""); + + EXPECT_EQ(levDistTextMetric->getMetric(), 0); +} + +TEST_F(JaccardTextMetricTest, check_eq_progs){ + std::ifstream fin1; + fin1.open("src/test-codes/code1.txt"); + + std::string text1( (std::istreambuf_iterator(fin1) ), + (std::istreambuf_iterator() ) ); + + fin1.close(); + jaccardTextMetric->setData(text1, text1); + + EXPECT_EQ(jaccardTextMetric->getMetric(), 1); +} + +TEST_F(JaccardTextMetricTest, check_absolutely_not_eq_progs) { + + jaccardTextMetric->setData("a b c", "d e f g"); + + EXPECT_EQ(jaccardTextMetric->getMetric(), 0); +} + +TEST_F(JaccardTextMetricTest, test_with_empty_prog) { + + jaccardTextMetric->setData("a b c", ""); + + EXPECT_EQ(jaccardTextMetric->getMetric(), 0); +} + + + diff --git a/metrics/tests/src/token_metrics_tests.cpp b/metrics/tests/src/token_metrics_tests.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bbc278d18db24569c66b40283b5cd432114ad2cd --- /dev/null +++ b/metrics/tests/src/token_metrics_tests.cpp @@ -0,0 +1,92 @@ +// +// Created by march on 16.05.2023. +// + +#include +#include + +#include + +#include "TokenMetricLib.h" + +class LevDistTokenMetricTest : public ::testing::Test { +protected: + std::unique_ptr levDistTokenMetric; + void SetUp(){ + levDistTokenMetric = std::make_unique (); + } + void TearDown(){} +}; + +class WShinglingTokenMetricTest : public ::testing::Test { +protected: + std::unique_ptr wShinglingTokenMetric; + void SetUp(){ + wShinglingTokenMetric = std::make_unique (); + } + void TearDown(){} +}; + +TEST_F(LevDistTokenMetricTest, check_eq_progs) { + + std::vector tokens1 = {1, 2, 3}; + + levDistTokenMetric->setData(tokens1, tokens1); + + EXPECT_EQ(levDistTokenMetric->getMetric(), 1); +} + +TEST_F(LevDistTokenMetricTest, check_absolutely_not_eq_progs) { + + std::vector tokens1 = {1, 2, 3}; + std::vector tokens2 = {3, 4, 5, 6}; + + levDistTokenMetric->setData(tokens1, tokens2); + + EXPECT_EQ(levDistTokenMetric->getMetric() < 0.5, true); +} + +TEST_F(LevDistTokenMetricTest, test_with_empty_prog) { + + std::vector tokens1 = {1, 2, 3}; + std::vector tokens2 = {}; + + levDistTokenMetric->setData(tokens1, tokens2); + + EXPECT_EQ(levDistTokenMetric->getMetric(), 0); +} + +TEST_F(WShinglingTokenMetricTest, check_eq_progs){ + + std::vector tokens1 = {1, 2, 3, 4, 5, 6}; + std::vector tokens2 = {1, 2, 3, 4, 5, 6}; + + wShinglingTokenMetric->setData(tokens1, tokens1); + + EXPECT_EQ(wShinglingTokenMetric->getMetric(), 1); +} + +TEST_F(WShinglingTokenMetricTest, check_absolutely_not_eq_progs) { + + std::vector tokens1 = {1, 2, 3}; + std::vector tokens2 = {4, 5, 6, 1}; + + wShinglingTokenMetric->setData(tokens1, tokens1); + + EXPECT_EQ(wShinglingTokenMetric->getMetric(), 0); +} + +TEST_F(WShinglingTokenMetricTest, test_with_empty_prog) { + + std::vector tokens1 = {1, 2, 3}; + std::vector tokens2 = {}; + + wShinglingTokenMetric->setData(tokens1, tokens1); + + EXPECT_EQ(wShinglingTokenMetric->getMetric(), 0); +} + + + + + diff --git a/src/main.cpp b/src/main.cpp index cbd2bd5cf24d419d22f50b5d29560a8591df4ecf..2eade44ba266eebca7e330a2c219da206e1475b7 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,264 +1,30 @@ - -#include -#include -#include -#include -#include -#include #include -#include -#include -#include - -namespace beast = boost::beast; // from -namespace http = beast::http; // from -namespace net = boost::asio; // from -using tcp = boost::asio::ip::tcp; // from - -//------------------------------------------------------------------------------ - -// Return a reasonable mime type based on the extension of a file. -beast::string_view mime_type(beast::string_view path) { - using beast::iequals; - auto const ext = [&path] { - auto const pos = path.rfind("."); - if (pos == beast::string_view::npos) return beast::string_view{}; - return path.substr(pos); - }(); - if (iequals(ext, ".htm")) return "text/html"; - if (iequals(ext, ".html")) return "text/html"; - if (iequals(ext, ".php")) return "text/html"; - if (iequals(ext, ".css")) return "text/css"; - if (iequals(ext, ".txt")) return "text/plain"; - if (iequals(ext, ".js")) return "application/javascript"; - if (iequals(ext, ".json")) return "application/json"; - if (iequals(ext, ".xml")) return "application/xml"; - if (iequals(ext, ".swf")) return "application/x-shockwave-flash"; - if (iequals(ext, ".flv")) return "video/x-flv"; - if (iequals(ext, ".png")) return "image/png"; - if (iequals(ext, ".jpe")) return "image/jpeg"; - if (iequals(ext, ".jpeg")) return "image/jpeg"; - if (iequals(ext, ".jpg")) return "image/jpeg"; - if (iequals(ext, ".gif")) return "image/gif"; - if (iequals(ext, ".bmp")) return "image/bmp"; - if (iequals(ext, ".ico")) return "image/vnd.microsoft.icon"; - if (iequals(ext, ".tiff")) return "image/tiff"; - if (iequals(ext, ".tif")) return "image/tiff"; - if (iequals(ext, ".svg")) return "image/svg+xml"; - if (iequals(ext, ".svgz")) return "image/svg+xml"; - return "application/text"; -} - -// Append an HTTP rel-path to a local filesystem path. -// The returned path is normalized for the platform. -std::string path_cat(beast::string_view base, beast::string_view path) { - if (base.empty()) return std::string(path); - std::string result(base); -#ifdef BOOST_MSVC - char constexpr path_separator = '\\'; - if (result.back() == path_separator) result.resize(result.size() - 1); - result.append(path.data(), path.size()); - for (auto& c : result) - if (c == '/') c = path_separator; -#else - char constexpr path_separator = '/'; - if (result.back() == path_separator) result.resize(result.size() - 1); - result.append(path.data(), path.size()); -#endif - return result; -} - -// This function produces an HTTP response for the given -// request. The type of the response object depends on the -// contents of the request, so the interface requires the -// caller to pass a generic lambda for receiving the response. -template -void handle_request(beast::string_view doc_root, - http::request>&& req, - Send&& send) { - // Returns a bad request response - auto const bad_request = [&req](beast::string_view why) { - http::response res{http::status::bad_request, - req.version()}; - res.set(http::field::server, BOOST_BEAST_VERSION_STRING); - res.set(http::field::content_type, "text/html"); - res.keep_alive(req.keep_alive()); - res.body() = std::string(why); - res.prepare_payload(); - return res; - }; - - // Returns a not found response - auto const not_found = [&req](beast::string_view target) { - http::response res{http::status::not_found, - req.version()}; - res.set(http::field::server, BOOST_BEAST_VERSION_STRING); - res.set(http::field::content_type, "text/html"); - res.keep_alive(req.keep_alive()); - res.body() = "The resource '" + std::string(target) + "' was not found."; - res.prepare_payload(); - return res; - }; - - // Returns a server error response - auto const server_error = [&req](beast::string_view what) { - http::response res{http::status::internal_server_error, - req.version()}; - res.set(http::field::server, BOOST_BEAST_VERSION_STRING); - res.set(http::field::content_type, "text/html"); - res.keep_alive(req.keep_alive()); - res.body() = "An error occurred: '" + std::string(what) + "'"; - res.prepare_payload(); - return res; - }; - - // Make sure we can handle the method - if (req.method() != http::verb::get && req.method() != http::verb::head) - return send(bad_request("Unknown HTTP-method")); - - // Request path must be absolute and not contain "..". - if (req.target().empty() || req.target()[0] != '/' || - req.target().find("..") != beast::string_view::npos) - return send(bad_request("Illegal request-target")); - - // Build the path to the requested file - std::string path = path_cat(doc_root, req.target()); - if (req.target().back() == '/') path.append("index.html"); - - // Attempt to open the file - beast::error_code ec; - http::file_body::value_type body; - body.open(path.c_str(), beast::file_mode::scan, ec); - - // Handle the case where the file doesn't exist - if (ec == beast::errc::no_such_file_or_directory) - return send(not_found(req.target())); - - // Handle an unknown error - if (ec) return send(server_error(ec.message())); - - // Cache the size since we need it after the move - auto const size = body.size(); - - // Respond to HEAD request - if (req.method() == http::verb::head) { - http::response res{http::status::ok, req.version()}; - res.set(http::field::server, BOOST_BEAST_VERSION_STRING); - res.set(http::field::content_type, mime_type(path)); - res.content_length(size); - res.keep_alive(req.keep_alive()); - return send(std::move(res)); - } - - // Respond to GET request - http::response res{ - std::piecewise_construct, std::make_tuple(std::move(body)), - std::make_tuple(http::status::ok, req.version())}; - res.set(http::field::server, BOOST_BEAST_VERSION_STRING); - res.set(http::field::content_type, mime_type(path)); - res.content_length(size); - res.keep_alive(req.keep_alive()); - return send(std::move(res)); -} - -//------------------------------------------------------------------------------ - -// Report a failure -void fail(beast::error_code ec, char const* what) { - std::cerr << what << ": " << ec.message() << "\n"; -} - -// This is the C++11 equivalent of a generic lambda. -// The function object is used to send an HTTP message. -template -struct send_lambda { - Stream& stream_; - bool& close_; - beast::error_code& ec_; - - explicit send_lambda(Stream& stream, bool& close, beast::error_code& ec) - : stream_(stream), close_(close), ec_(ec) {} - - template - void operator()(http::message&& msg) const { - // Determine if we should close the connection after - close_ = msg.need_eof(); - - // We need the serializer here because the serializer requires - // a non-const file_body, and the message oriented version of - // http::write only works with const messages. - http::serializer sr{msg}; - http::write(stream_, sr, ec_); - } -}; - -// Handles an HTTP server connection -void do_session(tcp::socket& socket, - std::shared_ptr const& doc_root) { - bool close = false; - beast::error_code ec; - - // This buffer is required to persist across reads - beast::flat_buffer buffer; - - // This lambda is used to send messages - send_lambda lambda{socket, close, ec}; - - for (;;) { - // Read a request - http::request req; - http::read(socket, buffer, req, ec); - if (ec == http::error::end_of_stream) break; - if (ec) return fail(ec, "read"); - - // Send the response - handle_request(*doc_root, std::move(req), lambda); - if (ec) return fail(ec, "write"); - if (close) { - // This means we should close the connection, usually because - // the response indicated the "Connection: close" semantic. - break; - } - } +#include - // Send a TCP shutdown - socket.shutdown(tcp::socket::shutdown_send, ec); +#include "TextMetricsLib.h" - // At this point the connection is closed gracefully -} +int main(){ + std::ifstream fin1; + fin1.open("metrics/testProgs/code1.txt"); + assert(fin1.is_open()); -//------------------------------------------------------------------------------ + std::ifstream fin2; + fin2.open("metrics/testProgs/code2.txt"); + assert(fin2.is_open()); -int main(int argc, char* argv[]) { - try { - // Check command line arguments. - if (argc != 4) { - std::cerr << "Usage: http-server-sync
\n" - << "Example:\n" - << " http-server-sync 0.0.0.0 8080 .\n"; - return EXIT_FAILURE; - } - auto const address = net::ip::make_address(argv[1]); - auto const port = static_cast(std::atoi(argv[2])); - auto const doc_root = std::make_shared(argv[3]); + std::string text1( (std::istreambuf_iterator(fin1) ), + (std::istreambuf_iterator() ) ); - // The io_context is required for all I/O - net::io_context ioc{1}; + std::string text2( (std::istreambuf_iterator(fin2) ), + (std::istreambuf_iterator() ) ); + fin1.close(); + fin2.close(); - // The acceptor receives incoming connections - tcp::acceptor acceptor{ioc, {address, port}}; - for (;;) { - // This will receive the new connection - tcp::socket socket{ioc}; + LevDistTextMetric livDistTextMetric; + JaccardTextMetric jaccardTextMetric; - // Block until we get a connection - acceptor.accept(socket); + livDistTextMetric.setData(text1, text2); + jaccardTextMetric.setData(text1, text2); - // Launch the session, transferring ownership of the socket - std::thread{std::bind(&do_session, std::move(socket), doc_root)}.detach(); - } - } catch (const std::exception& e) { - std::cerr << "Error: " << e.what() << std::endl; - return EXIT_FAILURE; - } -} + std::cout << livDistTextMetric.getMetric() << std::endl << jaccardTextMetric.getMetric(); +} \ No newline at end of file diff --git a/text-basic-metrics/tbm_main.cpp b/text-basic-metrics/tbm_main.cpp deleted file mode 100644 index fdd4253768b6369af5fe434526e26cbb9fc30bcd..0000000000000000000000000000000000000000 --- a/text-basic-metrics/tbm_main.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// -// Created by march on 21.04.2023. -// - -#include -#include -#include -#include -#include -#include -#include - -#include - - -std::string deleteComms(const std::string& text){ - std::string modif; - std::string res; - - std::stringstream ss; - std::string line; - - ss << text; - - while(getline(ss, line)){ - line.pop_back(); - line.push_back('\0'); - modif += line; - } - - bool s_comm = false; - bool m_comm = false; - - for (int i = 0; i < modif.size(); i++){ - if (s_comm && modif[i] == '\0') - s_comm = false; - else if (m_comm && modif[i] == '*' && modif[i + 1] == '/') - m_comm = false, i++; - else if (s_comm || m_comm) - continue; - else if (modif[i] == '/' && modif[i+1] == '/') - s_comm = true, i++; - else if (modif[i] == '/' && modif[i+1] == '*') - m_comm = true, i++; - - else if (modif[i] != '\0') - res += modif[i]; - else{ - res += '\n'; - } - } - return res; -} - -std::vector tbm_tokenizer(const std::string &text){ - boost::char_separator sep(" {}();,\"\0\'"); - std::vector res; - boost::tokenizer < boost::char_separator > tokens(text, sep); - - for (const std::string &s: tokens) { - if (!s.empty() && s[0] != '\n' && s[0] != '\0'){ - res.push_back(s); - } - } - return res; -} - -// % = intersection(A, B) / union(A, B) -double Jaccard_metric(const std::vector & tokens1, const std::vector & tokens2){ - std::set s1; - std::set s2; - - for (auto &i : tokens1) s1.insert(i); - for (auto &i : tokens2) s2.insert(i); - - - std::set intersect_sets; - set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(), - std::inserter(intersect_sets, intersect_sets.begin())); - - std::set union_sets; - set_union(s1.begin(), s1.end(), s2.begin(), s2.end(), - std::inserter(union_sets, union_sets.begin())); - - std::cout << intersect_sets.size() << " " << union_sets.size() << std::endl; - - return static_cast (intersect_sets.size()) / static_cast (union_sets.size()); -} - -double Livenstain_dist(std::vector tokens1, std::vector tokens2){ - unsigned long n = tokens1.size(); - unsigned long m = tokens2.size(); - int x, y, z; - - std::vector > lev (n, std::vector (m, 0)); - - for (int i = 0; i < n; i++){ - for (int j = 0; j < m; j++){ - if (std::min(i, j) == 0){ - lev[i][j] = std::max(i, j); - } - else{ - x = lev[i-1][j]; - y = lev[i][j-1]; - z = lev[i-1][j-1]; - lev[i][j] = std::min(x, std::min(y, z)); - if (tokens1[i] != tokens2[j]){ - lev[i][j]++; - } - } - } - } - - return lev[n-1][m-1]; -} - -std::pair textCompare(std::istream& fin1, std::istream& fin2){ - std::string line; - - std::string text1( (std::istreambuf_iterator(fin1) ), - (std::istreambuf_iterator() ) ); - - std::string text2( (std::istreambuf_iterator(fin2) ), - (std::istreambuf_iterator() ) ); - - std::string non_comm_text1 = deleteComms(text1); - std::string non_comm_text2 = deleteComms(text2); - - std::vector tokens1 = tbm_tokenizer(non_comm_text1); - std::vector tokens2 = tbm_tokenizer(non_comm_text2); - - double res1 = Jaccard_metric(tokens1, tokens2); - double res2 = 1 - Livenstain_dist(tokens1, tokens2) / std::max(tokens1.size(), tokens2.size()); - - return {res1, res2}; -} - -int main(){ - - std::ifstream fin1; - fin1.open("text-basic-metrics/code1.txt"); - assert(fin1.is_open()); - - std::ifstream fin2; - fin2.open("text-basic-metrics/code2.txt"); - assert(fin2.is_open()); - - std::pair metrics_res = textCompare(fin1, fin2); - - std::cout << "Jaccard metric "<< metrics_res.first << "\nLivenstein distance: " << metrics_res.second; - fin1.close(); - fin2.close(); -} \ No newline at end of file