@@ -83,7 +83,7 @@ def create_transcript_gene_map( # noqa: D417
8383 transcript_gene_map .columns = pd .Index (columns )
8484
8585 if rename_columns :
86- transcript_gene_map .rename (
86+ transcript_gene_map = transcript_gene_map .rename (
8787 columns = {
8888 "ensembl_transcript_id" : "transcript_id" ,
8989 "external_transcript_name" : "transcript_name" ,
@@ -95,12 +95,11 @@ def create_transcript_gene_map( # noqa: D417
9595 else "gene_id"
9696 ),
9797 },
98- inplace = True ,
9998 )
10099
101- transcript_gene_map .dropna (inplace = True )
102- transcript_gene_map .drop_duplicates (inplace = True )
103- transcript_gene_map .reset_index (drop = True , inplace = True )
100+ transcript_gene_map = transcript_gene_map .dropna ()
101+ transcript_gene_map = transcript_gene_map .drop_duplicates ()
102+ transcript_gene_map = transcript_gene_map .reset_index (drop = True )
104103
105104 return transcript_gene_map
106105
@@ -160,7 +159,7 @@ def create_transcript_gene_map_from_annotation( # noqa: D417
160159 """
161160 assert source_field != target_field , "The source_field and target_field must be different."
162161
163- transcript_gene_map = pd .DataFrame ( columns = [ "transcript_id" , "gene_id" , "gene_name" , "gene_biotype" ])
162+ transcript_gene_map_chunks : list [ pd .DataFrame ] = []
164163
165164 if "field" in kwargs :
166165 warning ("The field argument is deprecated. Please use the source_field and target_field arguments instead." )
@@ -207,15 +206,14 @@ def create_transcript_gene_map_from_annotation( # noqa: D417
207206 lambda x : (re .findall (rf'{ column } "([^"]*)"' , x )[0 ] if rf'{ column } "' in x else "" )
208207 )
209208
210- chunk .drop ("attribute" , axis = 1 , inplace = True )
209+ chunk = chunk .drop ("attribute" , axis = 1 )
211210
212- transcript_gene_map = pd .concat (
213- [
214- transcript_gene_map ,
215- chunk [["transcript_id" , "transcript_name" , "gene_id" , "gene_name" , "gene_biotype" ]],
216- ]
211+ transcript_gene_map_chunks .append (
212+ chunk [["transcript_id" , "transcript_name" , "gene_id" , "gene_name" , "gene_biotype" ]]
217213 )
218214
215+ transcript_gene_map = pd .concat (transcript_gene_map_chunks , ignore_index = True )
216+
219217 # Replace the gene_name with the gene_id where the gene_name is ""
220218 transcript_gene_map ["gene_name" ] = np .where (
221219 transcript_gene_map ["gene_name" ] == "" ,
@@ -225,8 +223,8 @@ def create_transcript_gene_map_from_annotation( # noqa: D417
225223
226224 # If only the transcript_name is present, we can drop the id and rename the transcript_name to transcript_id
227225 if source_field == "transcript_name" and use_transcript_name_as_replacement_id :
228- transcript_gene_map .drop ("transcript_id" , axis = 1 , inplace = True )
229- transcript_gene_map .rename (columns = {"transcript_name" : "transcript_id" }, inplace = True )
226+ transcript_gene_map = transcript_gene_map .drop ("transcript_id" , axis = 1 )
227+ transcript_gene_map = transcript_gene_map .rename (columns = {"transcript_name" : "transcript_id" })
230228
231229 source_field = "transcript_id"
232230
@@ -244,8 +242,8 @@ def create_transcript_gene_map_from_annotation( # noqa: D417
244242 ),
245243 )
246244
247- transcript_gene_map .drop ("gene_id" , axis = 1 , inplace = True )
248- transcript_gene_map .rename (columns = {"gene_name" : "gene_id" }, inplace = True )
245+ transcript_gene_map = transcript_gene_map .drop ("gene_id" , axis = 1 )
246+ transcript_gene_map = transcript_gene_map .rename (columns = {"gene_name" : "gene_id" })
249247
250248 if isinstance (target_field , list ):
251249 target_field = [field if field != "gene_name" else "gene_id" for field in target_field ]
@@ -255,14 +253,14 @@ def create_transcript_gene_map_from_annotation( # noqa: D417
255253 fields_to_keep = [source_field , * target_field ] if isinstance (target_field , list ) else [source_field , target_field ]
256254
257255 transcript_gene_map = transcript_gene_map [fields_to_keep ]
258- transcript_gene_map .replace ("" , np . nan , inplace = True )
259- transcript_gene_map .dropna (inplace = True )
256+ transcript_gene_map = transcript_gene_map .replace ("" , pd . NA )
257+ transcript_gene_map = transcript_gene_map .dropna ()
260258
261259 if source_field == "transcript_id" and (
262260 target_field == "gene_id" or (isinstance (target_field , list ) and "gene_id" in target_field )
263261 ):
264- transcript_gene_map .drop_duplicates (subset = ["transcript_id" , "gene_id" ], inplace = True )
262+ transcript_gene_map = transcript_gene_map .drop_duplicates (subset = ["transcript_id" , "gene_id" ])
265263
266- transcript_gene_map .reset_index (drop = True , inplace = True )
264+ transcript_gene_map = transcript_gene_map .reset_index (drop = True )
267265
268266 return transcript_gene_map
0 commit comments