Coverage for peakipy/utils.py: 98%

129 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-15 20:42 -0400

1import sys 

2import json 

3from datetime import datetime 

4from pathlib import Path 

5from typing import List 

6import shutil 

7 

8from rich import print 

9from rich.table import Table 

10 

11# for printing dataframes 

12peaklist_columns_for_printing = ["INDEX", "ASS", "X_PPM", "Y_PPM", "CLUSTID", "MEMCNT"] 

13bad_column_selection = [ 

14 "clustid", 

15 "amp", 

16 "center_x_ppm", 

17 "center_y_ppm", 

18 "fwhm_x_hz", 

19 "fwhm_y_hz", 

20 "lineshape", 

21] 

22bad_color_selection = [ 

23 "green", 

24 "blue", 

25 "yellow", 

26 "red", 

27 "yellow", 

28 "red", 

29 "magenta", 

30] 

31 

32 

33def mkdir_tmp_dir(base_path: Path = Path("./")): 

34 tmp_dir = base_path / "tmp" 

35 tmp_dir.mkdir(exist_ok=True) 

36 return tmp_dir 

37 

38 

39def create_log_path(base_path: Path = Path("./")): 

40 return base_path / "run_log.txt" 

41 

42 

43def run_log(log_name="run_log.txt"): 

44 """Write log file containing time script was run and with which arguments""" 

45 with open(log_name, "a") as log: 

46 sys_argv = sys.argv 

47 sys_argv[0] = Path(sys_argv[0]).name 

48 run_args = " ".join(sys_argv) 

49 time_stamp = datetime.now() 

50 time_stamp = time_stamp.strftime("%A %d %B %Y at %H:%M") 

51 log.write(f"# Script run on {time_stamp}:\n{run_args}\n") 

52 

53 

54def df_to_rich_table(df, title: str, columns: List[str], styles: str): 

55 """Print dataframe using rich library 

56 

57 Parameters 

58 ---------- 

59 df : pandas.DataFrame 

60 title : str 

61 title of table 

62 columns : List[str] 

63 list of column names (must be in df) 

64 styles : List[str] 

65 list of styles in same order as columns 

66 """ 

67 table = Table(title=title) 

68 for col, style in zip(columns, styles): 

69 table.add_column(col, style=style) 

70 for _, row in df.iterrows(): 

71 row = row[columns].values 

72 str_row = [] 

73 for i in row: 

74 match i: 

75 case str(): 

76 str_row.append(f"{i}") 

77 case float() if i > 1e5: 

78 str_row.append(f"{i:.1e}") 

79 case float(): 

80 str_row.append(f"{i:.3f}") 

81 case bool(): 

82 str_row.append(f"{i}") 

83 case int(): 

84 str_row.append(f"{i}") 

85 table.add_row(*str_row) 

86 return table 

87 

88 

89def load_config(config_path): 

90 if config_path.exists(): 

91 with open(config_path) as opened_config: 

92 config_dic = json.load(opened_config) 

93 return config_dic 

94 else: 

95 return {} 

96 

97 

98def write_config(config_path, config_dic): 

99 """ 

100 Write a configuration dictionary to a JSON file. 

101  

102 Parameters 

103 ---------- 

104 config_path : Path 

105 Path to where the config should be saved. 

106 config_dic : dict 

107 Dictionary containing configuration parameters to write to the file. 

108 """ 

109 with open(config_path, "w") as config: 

110 config.write(json.dumps(config_dic, sort_keys=True, indent=4)) 

111 

112 

113def update_config_file(config_path, config_kvs): 

114 config_dic = load_config(config_path) 

115 config_dic.update(config_kvs) 

116 write_config(config_path, config_dic) 

117 return config_dic 

118 

119 

120def update_args_with_values_from_config_file(args, config_path="peakipy.config"): 

121 """read a peakipy config file, extract params and update args dict 

122 

123 :param args: dict containing params extracted from docopt command line 

124 :type args: dict 

125 :param config_path: path to peakipy config file [default: peakipy.config] 

126 :type config_path: str 

127 

128 :returns args: updated args dict 

129 :rtype args: dict 

130 :returns config: dict that resulted from reading config file 

131 :rtype config: dict 

132 

133 """ 

134 # update args with values from peakipy.config file 

135 config_path = Path(config_path) 

136 if config_path.exists(): 

137 try: 

138 config = load_config(config_path) 

139 print( 

140 f"[green]Using config file with dims [yellow]{config.get('dims')}[/yellow][/green]" 

141 ) 

142 args["dims"] = config.get("dims", (0, 1, 2)) 

143 noise = config.get("noise") 

144 if noise: 

145 noise = float(noise) 

146 

147 colors = config.get("colors", ["#5e3c99", "#e66101"]) 

148 except json.decoder.JSONDecodeError: 

149 print( 

150 "[red]Your peakipy.config file is corrupted - maybe your JSON is not correct...[/red]" 

151 ) 

152 print("[red]Not using[/red]") 

153 noise = False 

154 colors = args.get("colors", ("#5e3c99", "#e66101")) 

155 config = {} 

156 else: 

157 print( 

158 "[red]No peakipy.config found - maybe you need to generate one with peakipy read or see docs[/red]" 

159 ) 

160 noise = False 

161 colors = args.get("colors", ("#5e3c99", "#e66101")) 

162 config = {} 

163 

164 args["noise"] = noise 

165 args["colors"] = colors 

166 

167 return args, config 

168 

169 

170def update_linewidths_from_hz_to_points(peakipy_data): 

171 """in case they were adjusted when running edit.py""" 

172 peakipy_data.df["XW"] = peakipy_data.df.XW_HZ * peakipy_data.pt_per_hz_f2 

173 peakipy_data.df["YW"] = peakipy_data.df.YW_HZ * peakipy_data.pt_per_hz_f1 

174 return peakipy_data 

175 

176 

177def update_peak_positions_from_ppm_to_points(peakipy_data): 

178 # convert peak positions from ppm to points in case they were adjusted running edit.py 

179 peakipy_data.df["X_AXIS"] = peakipy_data.df.X_PPM.apply( 

180 lambda x: peakipy_data.uc_f2(x, "PPM") 

181 ) 

182 peakipy_data.df["Y_AXIS"] = peakipy_data.df.Y_PPM.apply( 

183 lambda x: peakipy_data.uc_f1(x, "PPM") 

184 ) 

185 peakipy_data.df["X_AXISf"] = peakipy_data.df.X_PPM.apply( 

186 lambda x: peakipy_data.uc_f2.f(x, "PPM") 

187 ) 

188 peakipy_data.df["Y_AXISf"] = peakipy_data.df.Y_PPM.apply( 

189 lambda x: peakipy_data.uc_f1.f(x, "PPM") 

190 ) 

191 return peakipy_data 

192 

193 

194def check_for_existing_output_file_and_backup(outname: Path): 

195 if outname.exists(): 

196 shutil.copy(outname, outname.with_suffix(".bak")) 

197 else: 

198 pass 

199 return outname 

200 

201def save_data(df, output_name): 

202 suffix = output_name.suffix 

203 

204 if suffix == ".csv": 

205 df.to_csv(output_name, float_format="%.4f", index=False) 

206 

207 elif suffix == ".tab": 

208 df.to_csv(output_name, sep="\t", float_format="%.4f", index=False) 

209 

210 else: 

211 df.to_pickle(output_name) 

212 

213 

214def check_data_shape_is_consistent_with_dims(peakipy_data): 

215 # check data shape is consistent with dims 

216 if len(peakipy_data.dims) != len(peakipy_data.data.shape): 

217 print( 

218 f"Dims are {peakipy_data.dims} while data shape is {peakipy_data.data.shape}?" 

219 ) 

220 exit() 

221 

222 

223def check_for_include_column_and_add_if_missing(peakipy_data): 

224 # only include peaks with 'include' 

225 if "include" in peakipy_data.df.columns: 

226 pass 

227 else: 

228 # for compatibility 

229 peakipy_data.df["include"] = peakipy_data.df.apply(lambda _: "yes", axis=1) 

230 return peakipy_data 

231 

232 

233def remove_excluded_peaks(peakipy_data): 

234 if len(peakipy_data.df[peakipy_data.df.include != "yes"]) > 0: 

235 excluded = peakipy_data.df[peakipy_data.df.include != "yes"][ 

236 peaklist_columns_for_printing 

237 ] 

238 table = df_to_rich_table( 

239 excluded, 

240 title="[yellow] Excluded peaks [/yellow]", 

241 columns=excluded.columns, 

242 styles=["yellow" for i in excluded.columns], 

243 ) 

244 print(table) 

245 peakipy_data.df = peakipy_data.df[peakipy_data.df.include == "yes"] 

246 return peakipy_data 

247 

248 

249def warn_if_trying_to_fit_large_clusters(max_cluster_size, peakipy_data): 

250 if max_cluster_size is None: 

251 max_cluster_size = peakipy_data.df.MEMCNT.max() 

252 if peakipy_data.df.MEMCNT.max() > 10: 

253 print( 

254 f"""[red] 

255 ################################################################## 

256 You have some clusters of as many as {max_cluster_size} peaks. 

257 You may want to consider reducing the size of your clusters as the 

258 fits will struggle. 

259 

260 Otherwise you can use the --max-cluster-size flag to exclude large 

261 clusters 

262 ################################################################## 

263 [/red]""" 

264 ) 

265 else: 

266 max_cluster_size = max_cluster_size 

267 return max_cluster_size