diff --git a/src/matchcode_toolkit/stemming.py b/src/matchcode_toolkit/stemming.py index 11a7c4c..49282e1 100644 --- a/src/matchcode_toolkit/stemming.py +++ b/src/matchcode_toolkit/stemming.py @@ -149,7 +149,8 @@ def get_stem_code(location): # Apply mutations bottom-up mutations = dict(sorted(mutations.items(), reverse=True)) - text = source.decode() + # Ensure the text always ends with a newline character. + text = source.decode().rstrip("\n") + "\n" cur_count = 0 lines = text.splitlines(keepends=True) successive_line_count = [cur_count := cur_count + len(line) for line in lines] diff --git a/tests/test_stemming.py b/tests/test_stemming.py index cb37693..dd776e0 100644 --- a/tests/test_stemming.py +++ b/tests/test_stemming.py @@ -78,3 +78,9 @@ def test_rust_code_stemming(self): expected_file_location = self.test_data_dir / "rust/metrics-stemmeds.rs" results = stemming.get_stem_code(location=str(file_location)) check_against_expected_code_file(results, expected_file_location) + + def test_javascript_code_stemming_endwith_no_newline(self): + file_location = self.test_data_dir / "javascript/main.js" + expected_file_location = self.test_data_dir / "javascript/main-stemmed.js" + results = stemming.get_stem_code(location=str(file_location)) + check_against_expected_code_file(results, expected_file_location) diff --git a/tests/testfiles/stemming/javascript/main-stemmed.js b/tests/testfiles/stemming/javascript/main-stemmed.js new file mode 100644 index 0000000..0b5742f --- /dev/null +++ b/tests/testfiles/stemming/javascript/main-stemmed.js @@ -0,0 +1,2 @@ +const idf="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_-+=";function idf(idf){let idf="";for(let idf=0;idf