Commit 9cc070a0 authored by David Geisler's avatar David Geisler

small changes

parent a4c95c09
......@@ -191,7 +191,7 @@ namespace utue::pe::tools::lsh {
*/
Similarity();
[[nodiscard]] std::string toString(const unsigned int& top = 20, int cw = 10) const;
[[nodiscard]] std::string toString(const unsigned int& top = 5, int cw = 10) const;
};
private:
......@@ -231,7 +231,7 @@ namespace utue::pe::tools::lsh {
* @param hashes Number of different hash functions resulting in different permutations.
* @param buckets Number of buckets in the chained hash set of Tokens
*/
MinHash(const unsigned int& hashes = 100, const unsigned int& buckets = 500);
explicit MinHash(const unsigned int& hashes = 100, const unsigned int& buckets = 500);
/**
* Creates a new document
......
Subproject commit 69f6204673e5f42acf45298d47ca55d7618b5a8c
Subproject commit eff728e5b500b51ac3b4f9de99d1d852f70a9ad3
......@@ -101,7 +101,7 @@ namespace utue::pe::tools::lsh {
};
template <>
ScannerPtr Scanner::create<Token::NGRAM>(const float& weight, int length, int leaks) {
ScannerPtr Scanner::create<Token::NGRAM>(const float& weight, unsigned int length, unsigned int leaks) {
return ScannerPtr(new NGramScanner(length,leaks,weight));
}
}
\ No newline at end of file
......@@ -119,13 +119,13 @@ int main(int argc, const char** argv) {
minHash = MinHash::create(hashes,buckets);
// add scanners to tokenize the strings into ngrams
minHash->createScanner<Token::NGRAM>(1.0f,3,0); // extracts 3-grams and weight them by 1.0
minHash->createScanner<Token::NGRAM>(1.0f,4,1); // 4-grams but with one leak
minHash->createScanner<Token::NGRAM>(1.0f,5,2); // 5-grams but with two leaks
minHash->createScanner<Token::NGRAM>(1.0f,3u,0u); // extracts 3-grams and weight them by 1.0
minHash->createScanner<Token::NGRAM>(1.0f,4u,1u); // 4-grams but with one leak
minHash->createScanner<Token::NGRAM>(1.0f,5u,2u); // 5-grams but with two leaks
minHash->createScanner<Token::NGRAM>(1.5f,4,0); // extracts 4-grams and weight them by 2.0
minHash->createScanner<Token::NGRAM>(1.5f,5,1); // 5-grams but with one leak
minHash->createScanner<Token::NGRAM>(1.5f,6,2); // 6-grams but with two leaks
minHash->createScanner<Token::NGRAM>(1.5f,4u,0u); // extracts 4-grams and weight them by 2.0
minHash->createScanner<Token::NGRAM>(1.5f,5u,1u); // 5-grams but with one leak
minHash->createScanner<Token::NGRAM>(1.5f,6u,2u); // 6-grams but with two leaks
//
minHash->createScanner<Token::WORD>(2.0f);
......
......@@ -101,13 +101,13 @@ int main(int argc, const char** argv) {
subsMatch = SubsMatch::create();
// add scanners to tokenize the strings into ngrams
subsMatch->createScanner<Token::NGRAM>(1.0f,3,0); // extracts 3-grams and weight them by 1.0
subsMatch->createScanner<Token::NGRAM>(1.0f,4,1); // 4-grams but with one leak
subsMatch->createScanner<Token::NGRAM>(1.0f,5,2); // 5-grams but with two leaks
subsMatch->createScanner<Token::NGRAM>(1.0f,3u,0u); // extracts 3-grams and weight them by 1.0
subsMatch->createScanner<Token::NGRAM>(1.0f,4u,1u); // 4-grams but with one leak
subsMatch->createScanner<Token::NGRAM>(1.0f,5u,2u); // 5-grams but with two leaks
subsMatch->createScanner<Token::NGRAM>(1.5f,4,0); // extracts 4-grams and weight them by 2.0
subsMatch->createScanner<Token::NGRAM>(1.5f,5,1); // 5-grams but with one leak
subsMatch->createScanner<Token::NGRAM>(1.5f,6,2); // 6-grams but with two leaks
subsMatch->createScanner<Token::NGRAM>(1.5f,4u,0u); // extracts 4-grams and weight them by 2.0
subsMatch->createScanner<Token::NGRAM>(1.5f,5u,1u); // 5-grams but with one leak
subsMatch->createScanner<Token::NGRAM>(1.5f,6u,2u); // 6-grams but with two leaks
//
subsMatch->createScanner<Token::WORD>(2.0f);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment