diff --git a/metrics/CMakeLists.txt b/metrics/CMakeLists.txt index 83bc97d7022427064d0696fa61c04caf788f52d6..531f6f6bfdc5a9bc7564cc06c844806cc402cc43 100644 --- a/metrics/CMakeLists.txt +++ b/metrics/CMakeLists.txt @@ -1,13 +1,28 @@ -add_library(MetricsLib source/TextMetricImpl.cpp source/TokenMetricImpl.cpp) +project("MetricsLib") -target_include_directories(MetricsLib PUBLIC metrics_headers) -target_link_libraries(MetricsLib PUBLIC ${Boost_LIBRARIES}) +file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) +file(GLOB INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include) -add_custom_target( - ${PROJECT_NAME}_COVERAGE - COMMAND gcovr ${CMAKE_CURRENT_BINARY_DIR} -r ${CMAKE_CURRENT_SOURCE_DIR} -) +include_directories(${INCLUDE_DIRS}) +add_library(${PROJECT_NAME} ${SOURCES}) + +message("ANTLR4_LIB = ${ANTLR4_LIB}") +message("ANTLR4_LIB_INCLUDE_DIRS = ${ANTLR4_LIB_INCLUDE_DIRS}") + +target_include_directories(${PROJECT_NAME} PUBLIC ${INCLUDE_DIRS}) +target_link_libraries(${PROJECT_NAME} ${Boost_LIBRARIES}) + + +set(METRICS_LIBRARY ${PROJECT_NAME}) +set(METRICS_LIBRARY ${METRICS_LIBRARY} PARENT_SCOPE) + + +set(METRICS_lib_INCLUDE_DIRS ${INCLUDE_DIRS}) +set(METRICS_lib_INCLUDE_DIRS ${METRICS_lib_INCLUDE_DIRS} PARENT_SCOPE) + + +enable_testing() #if(BUILD_TESTS) add_subdirectory(tests) #endif() \ No newline at end of file diff --git a/metrics/include/TextMetricsLib.h b/metrics/include/TextMetricsLib.h new file mode 100644 index 0000000000000000000000000000000000000000..4db863f2a574c304d8717383634e2d9629ce59c9 --- /dev/null +++ b/metrics/include/TextMetricsLib.h @@ -0,0 +1,45 @@ +// +// Created by march on 02.05.2023. +// + +#ifndef SOURCEDOUT_DECLARATION_H +#define SOURCEDOUT_DECLARATION_H + +#include +#include +#include +#include +#include +#include + +class ITextMetric { + public: + virtual ~ITextMetric() = default; + virtual void setData(std::string text1, std::string text2) = 0; + virtual double getMetric() = 0; +}; + +class PrepareDataTextMetric : public ITextMetric { + public: + void setData(std::string text1, std::string text2) override; + + protected: + std::vector tokens1; + std::vector tokens2; + + private: + static std::string deleteComments(const std::string& text); + static std::vector tbmTokenizer(const std::string& text); +}; + +class LevDistTextMetric : public PrepareDataTextMetric { + public: + double getMetric() override; +}; + +class JaccardTextMetric : public PrepareDataTextMetric { + public: + double getMetric() override; +}; + +#endif // SOURCEDOUT_DECLARATION_H diff --git a/metrics/metrics_headers/TokenMetricLib.h b/metrics/include/TokenMetricLib.h similarity index 53% rename from metrics/metrics_headers/TokenMetricLib.h rename to metrics/include/TokenMetricLib.h index 82c238d88d6d31b343b68cb950e7bf573b9bd6d4..c333fe2abbb5c5b06801d4a19d77f5757dbb21f9 100644 --- a/metrics/metrics_headers/TokenMetricLib.h +++ b/metrics/include/TokenMetricLib.h @@ -15,29 +15,28 @@ class ITokenMetric{ - virtual void countMetric() = 0; - virtual void setData(std::vector tokens1, std::vector tokens2) = 0; + public: + virtual ~ITokenMetric() = default; + virtual void setData(std::vector tokens1, std::vector tokens2) = 0; virtual double getMetric() = 0; }; class PrepareDataTokenMetric : public ITokenMetric{ public: - void setData(std::vector _tokens1, std::vector _tokens2) override; - double getMetric() override; + void setData(std::vector _tokens1, std::vector _tokens2) override; protected: - std::vector tokens1; - std::vector tokens2; - double metric_res{}; + std::vector tokens1; + std::vector tokens2; }; -class LivDistTokenMetric : public PrepareDataTokenMetric{ +class LevDistTokenMetric : public PrepareDataTokenMetric{ public: - void countMetric() override; + double getMetric() override; }; class WShinglingTokenMetric : public PrepareDataTokenMetric{ public: - void countMetric() override; + double getMetric() override; }; #endif //SOURCEDOUT_TOKENMETRICLIB_H diff --git a/metrics/metrics_headers/TextMetricsLib.h b/metrics/metrics_headers/TextMetricsLib.h deleted file mode 100644 index 574a9642a9cca456aac4a3313651d1a9d4c621bf..0000000000000000000000000000000000000000 --- a/metrics/metrics_headers/TextMetricsLib.h +++ /dev/null @@ -1,43 +0,0 @@ -// -// Created by march on 02.05.2023. -// - -#ifndef SOURCEDOUT_DECLARATION_H -#define SOURCEDOUT_DECLARATION_H - -#include -#include -#include -#include -#include - -#include - -class ITextMetric{ - virtual void setData(std::string text1, std::string text2) = 0; - virtual double getMetric() = 0; -}; - -class PrepareDataTextMetric : public ITextMetric{ -public: - void setData(std::string text1, std::string text2) override; -protected: - std::vector tokens1; - std::vector tokens2; -private: - static std::string deleteComments(const std::string& text); - static std::vector tbmTokenizer(const std::string &text); -}; - -class LevDistTextMetric : public PrepareDataTextMetric{ -public: - double getMetric() override; -}; - -class JaccardTextMetric : public PrepareDataTextMetric{ -public: - double getMetric() override; -}; - - -#endif //SOURCEDOUT_DECLARATION_H diff --git a/metrics/source/TokenMetricImpl.cpp b/metrics/source/TokenMetricImpl.cpp deleted file mode 100644 index 2695234235c5fb6458212fdfe6ca3650acc9a10b..0000000000000000000000000000000000000000 --- a/metrics/source/TokenMetricImpl.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// -// Created by march on 04.05.2023. -// - -#include "TokenMetricLib.h" - -void PrepareDataTokenMetric::setData(std::vector _tokens1, std::vector _tokens2) {} - -double PrepareDataTokenMetric::getMetric() { - return metric_res; -} - -void LivDistTokenMetric::countMetric() {} - -void WShinglingTokenMetric::countMetric() {} diff --git a/metrics/source/TextMetricImpl.cpp b/metrics/src/TextMetricImpl.cpp similarity index 92% rename from metrics/source/TextMetricImpl.cpp rename to metrics/src/TextMetricImpl.cpp index 4b2cabc641b7152ace52147db89b016f1acdaff3..848c36bf6cfc1da24518fc279cb22275b9c89a26 100644 --- a/metrics/source/TextMetricImpl.cpp +++ b/metrics/src/TextMetricImpl.cpp @@ -30,7 +30,7 @@ std::string PrepareDataTextMetric::deleteComments(const std::string& text) { bool s_comm = false; bool m_comm = false; - for (int i = 0; i < modif.size(); i++){ + for (size_t i = 0; i < modif.size(); i++){ if (s_comm && modif[i] == '\0') s_comm = false; else if (m_comm && modif[i] == '*' && modif[i + 1] == '/') @@ -44,7 +44,7 @@ std::string PrepareDataTextMetric::deleteComments(const std::string& text) { else if (modif[i] != '\0') res += modif[i]; - else if (res.size() > 0 && res[res.size() - 1] != '\n') + else if (!res.empty() && res[res.size() - 1] != '\n') res += '\n'; } return res; @@ -70,10 +70,10 @@ double LevDistTextMetric::getMetric(){ std::vector > lev (n, std::vector (m, 0)); - for (int i = 0; i < n; i++){ - for (int j = 0; j < m; j++){ + for (size_t i = 0; i < n; i++){ + for (size_t j = 0; j < m; j++){ if (std::min(i, j) == 0){ - lev[i][j] = std::max(i, j); + lev[i][j] = static_cast (std::max(i, j)); } else{ x = lev[i-1][j]; diff --git a/metrics/src/TokenMetricImpl.cpp b/metrics/src/TokenMetricImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..70cba714e4d75cdaad0dc1fb56bcad7e83e2ce7f --- /dev/null +++ b/metrics/src/TokenMetricImpl.cpp @@ -0,0 +1,79 @@ +// +// Created by march on 04.05.2023. +// + +#include "TokenMetricLib.h" + +double LevDistTokenMetric::getMetric() { + unsigned long n = tokens1.size(); + unsigned long m = tokens2.size(); + int x, y, z; + + std::vector > lev (n, std::vector (m, 0)); + + for (size_t i = 0; i < n; i++){ + for (size_t j = 0; j < m; j++){ + if (std::min(i, j) == 0){ + lev[i][j] = static_cast (std::max(i, j)); + } + else{ + x = lev[i-1][j]; + y = lev[i][j-1]; + z = lev[i-1][j-1]; + lev[i][j] = std::min(x, std::min(y, z)); + if (tokens1[i] != tokens2[j]){ + lev[i][j]++; + } + } + } + } + + if (n == 0 || m == 0) + return 0; + double res = 1.0 - static_cast (lev[n-1][m-1]) / static_cast (std::max(n ,m)); + return res; +} + +double WShinglingTokenMetric::getMetric() { + unsigned long n = tokens1.size(); + unsigned long m = tokens2.size(); + + if (n == 0 || m == 0 || (n < 3 && m < 3)) + return 0; + + std::vector > sh1; + std::vector > sh2; + + for (size_t i = 0; i < n - 3; i++){ + sh1.emplace_back(tokens1[i], tokens1[i+1], tokens1[i+2]); + } + for (size_t i = 0; i < m - 3; i++){ + sh2.emplace_back(tokens2[i], tokens2[i+1], tokens2[i+2]); + } + + std::set > s1; + std::set > s2; + + for (auto &i : sh1) s1.insert(i); + for (auto &i : sh2) s2.insert(i); + + + std::set> intersect_sets; + set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(), + std::inserter(intersect_sets, intersect_sets.begin())); + + std::set> union_sets; + set_union(s1.begin(), s1.end(), s2.begin(), s2.end(), + std::inserter(union_sets, union_sets.begin())); + + if (union_sets.empty()) + return 0; + else + return static_cast (intersect_sets.size()) / static_cast (union_sets.size()); + +} + +void PrepareDataTokenMetric::setData(std::vector _tokens1, std::vector _tokens2) { + tokens1 = _tokens1; + tokens2 = _tokens2; +} diff --git a/metrics/tests/CMakeLists.txt b/metrics/tests/CMakeLists.txt index 1bdfef1af440092c627d443fd37e1c9df58ecb3f..0b5a55d4c1cf6aeaa1d772a77018a75d11bd41c3 100644 --- a/metrics/tests/CMakeLists.txt +++ b/metrics/tests/CMakeLists.txt @@ -1,4 +1,6 @@ -add_executable(metrics_test src/text_metrics_tests.cpp) +file(GLOB SOURCES src/*.cpp) + +add_executable(metrics_test ${SOURCES}) target_link_libraries(metrics_test MetricsLib GTest::gtest_main GTest::gmock) diff --git a/metrics/tests/src/main.cpp b/metrics/tests/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..70cfa61104ff822a57412778fd4a5de924e2d23c --- /dev/null +++ b/metrics/tests/src/main.cpp @@ -0,0 +1,13 @@ +// +// Created by march on 16.05.2023. +// + +#include +#include + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/metrics/tests/src/text_metrics_tests.cpp b/metrics/tests/src/text_metrics_tests.cpp index e8317dd28eb99a5603d8b7f836ac5a66144c3586..aae644ddcb51afe1b1bd2e124f56dd5ff02d2513 100644 --- a/metrics/tests/src/text_metrics_tests.cpp +++ b/metrics/tests/src/text_metrics_tests.cpp @@ -9,13 +9,12 @@ #include #include "TextMetricsLib.h" -#include "TokenMetricLib.h" -class LivDistTextMetricTest : public ::testing::Test { +class LevDistTextMetricTest : public ::testing::Test { protected: - std::unique_ptr livDistTextMetric; + std::unique_ptr levDistTextMetric; void SetUp(){ - livDistTextMetric = std::make_unique (); + levDistTextMetric = std::make_unique (); } void TearDown(){} }; @@ -29,27 +28,7 @@ protected: void TearDown(){} }; -class LivDistTokenMetricTest : public ::testing::Test { -protected: - std::unique_ptr livDistTokenMetric; - void SetUp(){ - livDistTokenMetric = std::make_unique (); - } - void TearDown(){} -}; - -class WShinglingTokenMetricTest : public ::testing::Test { -protected: - std::unique_ptr wShinglingTokenMetric; - void SetUp(){ - wShinglingTokenMetric = std::make_unique (); - } - void TearDown(){} -}; - - - -TEST_F(LivDistTextMetricTest, check_eq_progs) { +TEST_F(LevDistTextMetricTest, check_eq_progs) { std::ifstream fin1; fin1.open("src/test-codes/code1.txt"); @@ -57,26 +36,23 @@ TEST_F(LivDistTextMetricTest, check_eq_progs) { (std::istreambuf_iterator() ) ); fin1.close(); - livDistTextMetric->setData(text1, text1); - livDistTextMetric->countMetric(); + levDistTextMetric->setData(text1, text1); - EXPECT_EQ(livDistTextMetric->getMetric(), 1); + EXPECT_EQ(levDistTextMetric->getMetric(), 1); } -TEST_F(LivDistTextMetricTest, check_absolutely_not_eq_progs) { +TEST_F(LevDistTextMetricTest, check_absolutely_not_eq_progs) { - livDistTextMetric->setData("a b c", "d e f g"); - livDistTextMetric->countMetric(); + levDistTextMetric->setData("a b c", "d e f g"); - EXPECT_EQ(livDistTextMetric->getMetric() < 0.5, true); + EXPECT_EQ(levDistTextMetric->getMetric() < 0.5, true); } -TEST_F(LivDistTextMetricTest, test_with_empty_prog) { +TEST_F(LevDistTextMetricTest, test_with_empty_prog) { - livDistTextMetric->setData("a b c", ""); - livDistTextMetric->countMetric(); + levDistTextMetric->setData("a b c", ""); - EXPECT_EQ(livDistTextMetric->getMetric(), 0); + EXPECT_EQ(levDistTextMetric->getMetric(), 0); } TEST_F(JaccardTextMetricTest, check_eq_progs){ @@ -88,7 +64,6 @@ TEST_F(JaccardTextMetricTest, check_eq_progs){ fin1.close(); jaccardTextMetric->setData(text1, text1); - jaccardTextMetric->countMetric(); EXPECT_EQ(jaccardTextMetric->getMetric(), 1); } @@ -96,7 +71,6 @@ TEST_F(JaccardTextMetricTest, check_eq_progs){ TEST_F(JaccardTextMetricTest, check_absolutely_not_eq_progs) { jaccardTextMetric->setData("a b c", "d e f g"); - jaccardTextMetric->countMetric(); EXPECT_EQ(jaccardTextMetric->getMetric(), 0); } @@ -104,83 +78,9 @@ TEST_F(JaccardTextMetricTest, check_absolutely_not_eq_progs) { TEST_F(JaccardTextMetricTest, test_with_empty_prog) { jaccardTextMetric->setData("a b c", ""); - jaccardTextMetric->countMetric(); EXPECT_EQ(jaccardTextMetric->getMetric(), 0); } -TEST_F(LivDistTokenMetricTest, check_eq_progs) { - - std::vector tokens1 = {"a", "b", "c"}; - std::vector tokens2 = {"b", "a", "c"}; - - livDistTokenMetric->setData(tokens1, tokens1); - livDistTokenMetric->countMetric(); - - EXPECT_EQ(livDistTokenMetric->getMetric(), 1); -} - -TEST_F(LivDistTokenMetricTest, check_absolutely_not_eq_progs) { - - std::vector tokens1 = {"a", "b", "c"}; - std::vector tokens2 = {"d", "e", "f", "o"}; - - livDistTokenMetric->setData(tokens1, tokens1); - livDistTokenMetric->countMetric(); - - EXPECT_EQ(livDistTokenMetric->getMetric() < 0.5, true); -} - -TEST_F(LivDistTokenMetricTest, test_with_empty_prog) { - - std::vector tokens1 = {"a", "b", "c"}; - std::vector tokens2 = {}; - - livDistTokenMetric->setData(tokens1, tokens1); - livDistTokenMetric->countMetric(); - - EXPECT_EQ(livDistTokenMetric->getMetric(), 0); -} - -TEST_F(WShinglingTokenMetricTest, check_eq_progs){ - - std::vector tokens1 = {"a", "b", "c"}; - std::vector tokens2 = {"b", "a", "c"}; - - wShinglingTokenMetric->setData(tokens1, tokens1); - wShinglingTokenMetric->countMetric(); - - EXPECT_EQ(wShinglingTokenMetric->getMetric(), 1); -} - -TEST_F(WShinglingTokenMetricTest, check_absolutely_not_eq_progs) { - - std::vector tokens1 = {"a", "b", "c"}; - std::vector tokens2 = {"d", "e", "f", "o"}; - - wShinglingTokenMetric->setData(tokens1, tokens1); - wShinglingTokenMetric->countMetric(); - - EXPECT_EQ(wShinglingTokenMetric->getMetric(), 0); -} - -TEST_F(WShinglingTokenMetricTest, test_with_empty_prog) { - - std::vector tokens1 = {"a", "b", "c"}; - std::vector tokens2 = {}; - - wShinglingTokenMetric->setData(tokens1, tokens1); - wShinglingTokenMetric->countMetric(); - - EXPECT_EQ(wShinglingTokenMetric->getMetric(), 0); -} - -int main(int argc, char **argv) -{ - ::testing::InitGoogleTest(&argc, argv); - - return RUN_ALL_TESTS(); -} - diff --git a/metrics/tests/src/token_metrics_tests.cpp b/metrics/tests/src/token_metrics_tests.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bbc278d18db24569c66b40283b5cd432114ad2cd --- /dev/null +++ b/metrics/tests/src/token_metrics_tests.cpp @@ -0,0 +1,92 @@ +// +// Created by march on 16.05.2023. +// + +#include +#include + +#include + +#include "TokenMetricLib.h" + +class LevDistTokenMetricTest : public ::testing::Test { +protected: + std::unique_ptr levDistTokenMetric; + void SetUp(){ + levDistTokenMetric = std::make_unique (); + } + void TearDown(){} +}; + +class WShinglingTokenMetricTest : public ::testing::Test { +protected: + std::unique_ptr wShinglingTokenMetric; + void SetUp(){ + wShinglingTokenMetric = std::make_unique (); + } + void TearDown(){} +}; + +TEST_F(LevDistTokenMetricTest, check_eq_progs) { + + std::vector tokens1 = {1, 2, 3}; + + levDistTokenMetric->setData(tokens1, tokens1); + + EXPECT_EQ(levDistTokenMetric->getMetric(), 1); +} + +TEST_F(LevDistTokenMetricTest, check_absolutely_not_eq_progs) { + + std::vector tokens1 = {1, 2, 3}; + std::vector tokens2 = {3, 4, 5, 6}; + + levDistTokenMetric->setData(tokens1, tokens2); + + EXPECT_EQ(levDistTokenMetric->getMetric() < 0.5, true); +} + +TEST_F(LevDistTokenMetricTest, test_with_empty_prog) { + + std::vector tokens1 = {1, 2, 3}; + std::vector tokens2 = {}; + + levDistTokenMetric->setData(tokens1, tokens2); + + EXPECT_EQ(levDistTokenMetric->getMetric(), 0); +} + +TEST_F(WShinglingTokenMetricTest, check_eq_progs){ + + std::vector tokens1 = {1, 2, 3, 4, 5, 6}; + std::vector tokens2 = {1, 2, 3, 4, 5, 6}; + + wShinglingTokenMetric->setData(tokens1, tokens1); + + EXPECT_EQ(wShinglingTokenMetric->getMetric(), 1); +} + +TEST_F(WShinglingTokenMetricTest, check_absolutely_not_eq_progs) { + + std::vector tokens1 = {1, 2, 3}; + std::vector tokens2 = {4, 5, 6, 1}; + + wShinglingTokenMetric->setData(tokens1, tokens1); + + EXPECT_EQ(wShinglingTokenMetric->getMetric(), 0); +} + +TEST_F(WShinglingTokenMetricTest, test_with_empty_prog) { + + std::vector tokens1 = {1, 2, 3}; + std::vector tokens2 = {}; + + wShinglingTokenMetric->setData(tokens1, tokens1); + + EXPECT_EQ(wShinglingTokenMetric->getMetric(), 0); +} + + + + +