KnowledgeCaptureAndDiscovery · dgarijo · Dec 17, 2025 · Dec 17, 2025 · Dec 17, 2025 · Dec 17, 2025
diff --git a/src/somef/process_repository.py b/src/somef/process_repository.py
@@ -774,10 +774,17 @@ def download_github_files(directory, owner, repo_name, repo_ref, authorization):
     with open(repo_zip_file, "wb") as f:
         f.write(repo_zip)
 
-    with zipfile.ZipFile(repo_zip_file, "r") as zip_ref:
-        zip_ref.extractall(repo_extract_dir)
-
+    try:
+        with zipfile.ZipFile(repo_zip_file, "r") as zip_ref: 
+            zip_ref.extractall(repo_extract_dir) 
+    except zipfile.BadZipFile: 
+        logging.error("Downloaded archive is not a valid zip (repo may be empty)") 
+        return None
+
     repo_folders = os.listdir(repo_extract_dir)
+    if not repo_folders: 
+        logging.warning("Repository archive is empty") 
+        return None
 
     repo_dir = os.path.join(repo_extract_dir, repo_folders[0])
     return repo_dir

diff --git a/src/somef/test/test_JSON_export.py b/src/somef/test/test_JSON_export.py
@@ -407,36 +407,36 @@ def test_issue_830(self):
     #         except Exception as e:
     #             print(f"Failed to delete {cls.json_file}: {e}")  
 
-    # def test_issue_862(self):
-    #     """Checks if this repository does not gets stuck when labeling headers"""
-    #     somef_cli.run_cli(threshold=0.8,
-    #                         ignore_classifiers=False,
-    #                         repo_url=None,
-    #                         local_repo=test_data_repositories + "componentInstaller",
-    #                         doc_src=None,
-    #                         in_file=None,
-    #                         output=test_data_path + "test_issue_862.json",
-    #                         graph_out=None,
-    #                         graph_format="turtle",
-    #                         codemeta_out=None,
-    #                         pretty=True,
-    #                         missing=False,
-    #                         readme_only=False)
+    def test_issue_862(self):
+        """Checks if this repository does not gets stuck when labeling headers"""
+        somef_cli.run_cli(threshold=0.8,
+                            ignore_classifiers=False,
+                            repo_url=None,
+                            local_repo=test_data_repositories + "componentInstaller",
+                            doc_src=None,
+                            in_file=None,
+                            output=test_data_path + "test_issue_862.json",
+                            graph_out=None,
+                            graph_format="turtle",
+                            codemeta_out=None,
+                            pretty=True,
+                            missing=False,
+                            readme_only=False)
 
-    #     text_file = open(test_data_path + "test_issue_862.json", "r")
-    #     data = text_file.read()
-    #     text_file.close()
-    #     json_content = json.loads(data)
+        text_file = open(test_data_path + "test_issue_862.json", "r")
+        data = text_file.read()
+        text_file.close()
+        json_content = json.loads(data)
 
-    #     assert "description" in json_content, "Missing 'description' property" 
+        assert "description" in json_content, "Missing 'description' property" 
 
-    #     assert len(json_content["description"]) > 0, "Description list is empty" 
+        assert len(json_content["description"]) > 0, "Description list is empty" 
 
-    #     first_desc = json_content["description"][0]["result"] 
-    #     assert "value" in first_desc, "Missing 'value' in description result" 
-    #     assert first_desc["value"], "Description 'value' is empty"    
+        first_desc = json_content["description"][0]["result"] 
+        assert "value" in first_desc, "Missing 'value' in description result" 
+        assert first_desc["value"], "Description 'value' is empty"    
 
-    #     os.remove(test_data_path + "test_issue_862.json")
+        os.remove(test_data_path + "test_issue_862.json")
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/src/somef/utils/constants.py b/src/somef/utils/constants.py
@@ -24,7 +24,9 @@
 REGEXP_PYPI_2 = "[![Latest PyPI version]"
 REGEXP_COLAB = "https://colab.research.google.com/drive"
 # needed to cleanup bibtext files.
-REGEXP_BIBTEX = r'\@[a-zA-Z]+\{[.\n\S\s]+?[author|title][.\n\S\s]+?[author|title][.\n\S\s]+?\n\}'
+# REGEXP_BIBTEX = r'\@[a-zA-Z]+\{[.\n\S\s]+?[author|title][.\n\S\s]+?[author|title][.\n\S\s]+?\n\}'
+REGEXP_BIBTEX = r'@[a-zA-Z]+\{[\s\S]*?(author|title)[\s\S]*?(author|title)[\s\S]*?\}'
+
 REGEXP_DOI = r'\[\!\[DOI\]([^\]]+)\]\(([^)]+)\)'
 REGEXP_LINKS = r"\[(.*?)?\]\(([^)]+)\)"
 REGEXP_IMAGES = r"!\[(.*?)?\]\((.*?)?\)"