fix paths in posts.json

milahu · milahu · commit 8647ab00fc25 · 2025-12-30T12:22:14.000+01:00
diff --git a/src/substack2markdown/substack_scraper.py b/src/substack2markdown/substack_scraper.py
@@ -149,8 +149,6 @@ def get_all_post_urls(self) -> List[str]:
 
     def get_all_post_urls_offline(self) -> List[str]:
         # Read JSON data
-        # NOTE this assumes that $post_slug is not used in args.output_directory_format
-        # because post_slug is undefined at this point
         output_directory = self.output_directory_template.substitute(self.format_vars)
         self.format_vars["output_directory"] = output_directory
         posts_json_path = os.path.join(
@@ -540,6 +538,9 @@ async def scrape_posts(self, num_posts_to_scrape: int = 0) -> None:
         """
         Iterates over all posts and saves them as markdown and html files
         """
+        output_directory = self.output_directory_template.substitute(self.format_vars)
+        self.format_vars["output_directory"] = output_directory
+
         posts_data = []
         count = 0
         total = num_posts_to_scrape if num_posts_to_scrape != 0 else len(self.post_urls)
@@ -548,9 +549,6 @@ async def scrape_posts(self, num_posts_to_scrape: int = 0) -> None:
                 post_slug = url.split("/")[-1]
                 self.format_vars["post_slug"] = post_slug
 
-                output_directory = self.output_directory_template.substitute(self.format_vars)
-                self.format_vars["output_directory"] = output_directory
-
                 md_filepath = os.path.join(
                     output_directory,
                     self.md_path_template.substitute(self.format_vars)
@@ -654,8 +652,8 @@ async def scrape_posts(self, num_posts_to_scrape: int = 0) -> None:
                         "like_count": like_count,
                         "comment_count": comments_num,
                         "date": date,
-                        "file_link": md_filepath,
-                        "html_link": html_filepath
+                        "file_link": os.path.relpath(md_filepath, output_directory),
+                        "html_link": os.path.relpath(html_filepath, output_directory),
                     })
                 else:
                     print(f"File already exists: {md_filepath}")