diff --git a/docs/parallel_config.md b/docs/parallel_config.md index b23b7f514..5e329a792 100644 --- a/docs/parallel_config.md +++ b/docs/parallel_config.md @@ -47,7 +47,7 @@ Validate parameters/structure of configuration data. * **input_dir**: (str, required): path/name of input images directory (validates that it exists). -* **json**: (str, required): path/name of output JSON data file (appends new data if it already exists). +* **results**: (str, required): path/name of output JSON data file (appends new data if it already exists). * **filename_metadata**: (list, required): list of metadata terms used to construct filenames. for example: @@ -206,7 +206,7 @@ config.import_config(config_file="my_config.json") # Change configuration values directly in Python as needed. At a minimum you must specify input_dir, json, filename_metadata, workflow. config.input_dir = "./my_images" -config.json = "output.json" +config.results = "output.json" config.filename_metadata = ["plantbarcode", "timestamp"] config.workflow = "my_workflow.py" diff --git a/docs/pipeline_parallel.md b/docs/pipeline_parallel.md index ffb3a2ee6..b0d18f118 100644 --- a/docs/pipeline_parallel.md +++ b/docs/pipeline_parallel.md @@ -125,7 +125,7 @@ Sample image filename: `cam1_16-08-06-16:45_el1100s1_p19.jpg` ``` { "input_dir": "/shares/mgehan_share/raw_data/raw_image/2016-08_pat-edger/data/split-round1/split-cam1", - "json": "edger-round1-brassica.json", + "results": "edger-round1-brassica.json", "filename_metadata": ["camera", "timestamp", "id", "other"], "workflow": "/home/mgehan/pat-edger/round1-python-pipelines/2016-08_pat-edger_brassica-cam1-splitimg.py", "img_outdir": "/shares/mgehan_share/raw_data/raw_image/2016-08_pat-edger/data/split-round1/split-cam1/output", @@ -203,7 +203,7 @@ in a list to the `filename_metadata` parameter. ```bash { "input_dir": "input_directory", - "json": "output.json", + "results": "output.json", "filename_metadata": ["camera", "plantbarcode", "timestamp"], "workflow": "user-workflow.py", "img_outdir": "output_directory", @@ -263,7 +263,7 @@ Finally, we filter the basename for top view rgb images with "TV_VIS.*". ```bash { "input_dir": "input_directory", - "json": "output.json", + "results": "output.json", "filename_metadata": [""], "workflow": "user-workflow.py", "img_outdir": "output_directory", @@ -315,7 +315,7 @@ To identify each image within our workflow, we will name them based on the `imgt ``` { "input_dir": "/shares/mgehan_share/raw_data/raw_image/2016-08_pat-edger/data/split-round1/split-cam1", - "json": "edger-round1-brassica.json", + "results": "edger-round1-brassica.json", "filename_metadata": ["imgtype", "timestamp", "id", "other"], "workflow": "/home/mgehan/pat-edger/round1-python-pipelines/2016-08_pat-edger_brassica-cam1-splitimg.py", "img_outdir": "/shares/mgehan_share/raw_data/raw_image/2016-08_pat-edger/data/split-round1/split-cam1/output", diff --git a/docs/updating.md b/docs/updating.md index f9aa86c27..c9adbd762 100644 --- a/docs/updating.md +++ b/docs/updating.md @@ -59,6 +59,10 @@ automatically. Alternatively, you can run `pip install -e .` to reinstall the pa ### Breaking changes between v4 and v5 +#### plantcv.parallel.WorkflowConfig + +Renamed the "json" attribute to "results" for clarity about what it controls and for consistency with new [jupyterconfig](parallel_jupyterconfig.md) + #### plantcv.spectral_index.egi Renamed the input parameter `rgb_img` to `img` to reflect the flexibility of using the [EGI index function](spectral_index.md) diff --git a/plantcv/parallel/cli.py b/plantcv/parallel/cli.py index d89ca019b..326f345f3 100644 --- a/plantcv/parallel/cli.py +++ b/plantcv/parallel/cli.py @@ -39,8 +39,8 @@ def options(): # Import a configuration if provided if args.config: config.import_config(config_file=args.config) - if args.config == config.json: - print("Error: Configuration file would be overwritten by results, change the json field of config.", + if args.config == config.results: + print("Configuration file would be overwritten by results, change the results field of config.", file=sys.stderr) sys.exit(1) diff --git a/plantcv/parallel/jupyterconfig.py b/plantcv/parallel/jupyterconfig.py index 43b3278f9..728b9afa3 100644 --- a/plantcv/parallel/jupyterconfig.py +++ b/plantcv/parallel/jupyterconfig.py @@ -206,7 +206,7 @@ def save_config(self): object.__setattr__(config, attr, getattr(self, attr)) # set a few manually due to property differences config.workflow = self.workflow - config.json = self.results + config.results = self.results # save config.save_config(config_file=self.config) parallel_print("Saved " + self.config, verbose=self.verbose) @@ -225,9 +225,7 @@ def import_config(self, config_file): # Import the JSON configuration data config = json.load(fp) for key, value in config.items(): - if key == "json": - object.__setattr__(self, "results", value) - elif key != "_metadata_terms": + if key != "_metadata_terms": object.__setattr__(self, key, value) @staticmethod diff --git a/plantcv/parallel/process_results.py b/plantcv/parallel/process_results.py index 80ed5ad37..292e21bda 100644 --- a/plantcv/parallel/process_results.py +++ b/plantcv/parallel/process_results.py @@ -25,7 +25,7 @@ def process_results(config): # process results from the checkpoint inside start point for tmp dirs job_dir = os.path.join(config.chkpt_start_dir, "_PCV_PARALLEL_CHECKPOINT_") # name outputs from config - json_file = config.json + json_file = config.results # Data dictionary data = {"variables": {}, "entities": []} if os.path.exists(json_file): diff --git a/plantcv/parallel/run_parallel.py b/plantcv/parallel/run_parallel.py index 937d4b88a..4ac9ad314 100644 --- a/plantcv/parallel/run_parallel.py +++ b/plantcv/parallel/run_parallel.py @@ -37,8 +37,8 @@ def run_parallel(config): os.makedirs(config.img_outdir, exist_ok=True) # Remove JSON results file if append=False - if not config.append and os.path.exists(config.json): - os.remove(config.json) + if not config.append and os.path.exists(config.results): + os.remove(config.results) # Read image metadata ########################################### @@ -82,7 +82,7 @@ def run_parallel(config): # Convert results start time convert_results_start_time = time.time() print("Converting json to csv... ", file=sys.stderr) - plantcv.utils.json2csv(config.json, os.path.splitext(config.json)[0]) + plantcv.utils.json2csv(config.results, os.path.splitext(config.results)[0]) convert_results_clock_time = time.time() - convert_results_start_time parallel_print(f"Processing results took {convert_results_clock_time} seconds.", file=sys.stderr, verbose=verbose) ########################################### diff --git a/plantcv/parallel/workflowconfig.py b/plantcv/parallel/workflowconfig.py index 3998123b9..2accecf06 100644 --- a/plantcv/parallel/workflowconfig.py +++ b/plantcv/parallel/workflowconfig.py @@ -9,7 +9,7 @@ class WorkflowConfig: def __init__(self): object.__setattr__(self, "input_dir", "") - object.__setattr__(self, "json", "") + object.__setattr__(self, "results", "") object.__setattr__(self, "filename_metadata", []) object.__setattr__(self, "workflow", "") object.__setattr__(self, "img_outdir", "./output_images") @@ -99,9 +99,9 @@ def validate_config(self): print(f"Error: input directory (input_dir) is required and {self.input_dir} does not exist.", file=sys.stderr) checks.append(False) - # Validate JSON file - if self.json == "": - print("Error: an output JSON file (json) is required but is currently undefined.", file=sys.stderr) + # Validate JSON results file + if self.results == "": + print("Error: an output JSON file (results) is required but is currently undefined.", file=sys.stderr) checks.append(False) # Validate workflow script if not os.path.exists(self.workflow): @@ -308,7 +308,6 @@ def _config_attr_lookup(config, attr, val): # for all other attributes, get their data from list config_control = { "input_dir": ["Images will be read from {}", str], - "json": ["output will be written to {}", str], "filename_metadata": ["Filenames will be parsed into {}", list], "workflow": ["Will run {} python script in each job", str], "img_outdir": ["Output images will be written to {}", str], diff --git a/tests/parallel/test_cli.py b/tests/parallel/test_cli.py index 2183fc785..9130b7535 100644 --- a/tests/parallel/test_cli.py +++ b/tests/parallel/test_cli.py @@ -24,7 +24,7 @@ def test_parallel_cli_invalid_config(parallel_test_data, tmpdir): conf_file = tmpdir.mkdir("cache").join("config.json") config = WorkflowConfig() # Set valid values in config - config.json = "valid_config.json" + config.results = "valid_config.json" config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"] config.workflow = parallel_test_data.workflow_script config.img_outdir = str(conf_file.dirpath()) @@ -43,7 +43,7 @@ def test_parallel_cli_overwriting_config(parallel_test_data, tmpdir): conf_file = tmpdir.mkdir("cache").join("config.json") config = WorkflowConfig() # Set valid values in config - config.json = conf_file.strpath + config.results = conf_file.strpath config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"] config.workflow = parallel_test_data.workflow_script config.img_outdir = str(conf_file.dirpath()) @@ -65,7 +65,7 @@ def test_parallel_cli_valid_config(parallel_test_data, tmpdir): config = WorkflowConfig() # Set valid values in config config.input_dir = parallel_test_data.flat_imgdir - config.json = conf_file.dirpath().join(os.path.basename(parallel_test_data.new_results_file)).strpath + config.results = conf_file.dirpath().join(os.path.basename(parallel_test_data.new_results_file)).strpath config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"] config.metadata_regex = {"filepath": ".*"} config.workflow = parallel_test_data.workflow_script diff --git a/tests/parallel/test_job_builder.py b/tests/parallel/test_job_builder.py index 323133755..eca23490b 100644 --- a/tests/parallel/test_job_builder.py +++ b/tests/parallel/test_job_builder.py @@ -9,7 +9,7 @@ def test_job_builder_single_image(parallel_test_data, tmpdir): # Create config instance config = WorkflowConfig() config.input_dir = parallel_test_data.snapshot_imgdir - config.json = "output.json" + config.results = "output.json" config.tmp_dir = str(tmp_dir) config.filename_metadata = ["imgtype", "camera", "rotation", "zoom", "lifter", "gain", "exposure", "id"] config.workflow = parallel_test_data.workflow_script @@ -41,7 +41,7 @@ def test_job_builder_coprocess(parallel_test_data, tmpdir): # Create config instance config = WorkflowConfig() config.input_dir = parallel_test_data.snapshot_imgdir - config.json = "output.json" + config.results = "output.json" config.tmp_dir = str(tmp_dir) config.filename_metadata = ["imgtype", "camera", "rotation", "zoom", "lifter", "gain", "exposure", "id"] config.workflow = parallel_test_data.workflow_script @@ -74,7 +74,7 @@ def test_job_builder_auto_name(parallel_test_data, tmpdir): # Create config instance config = WorkflowConfig() config.input_dir = parallel_test_data.snapshot_imgdir - config.json = "output.json" + config.results = "output.json" config.tmp_dir = str(tmp_dir) config.filename_metadata = ["imgtype", "camera", "rotation", "zoom", "lifter", "gain", "exposure", "id"] config.workflow = parallel_test_data.workflow_script diff --git a/tests/parallel/test_jupyterconfig.py b/tests/parallel/test_jupyterconfig.py index 862af3dea..41d0a33af 100644 --- a/tests/parallel/test_jupyterconfig.py +++ b/tests/parallel/test_jupyterconfig.py @@ -79,7 +79,7 @@ def test_jupcon_run(parallel_test_data, tmpdir): jupcon.notebook = jupcon.find_notebook() jupcon.input_dir = parallel_test_data.flat_imgdir jupcon.workflow = "example.py" - jupcon.results = "example" + jupcon.results = "example.json" jupcon.run() assert os.path.exists(jupcon.results) diff --git a/tests/parallel/test_parsers.py b/tests/parallel/test_parsers.py index b80af5bae..0db85667d 100644 --- a/tests/parallel/test_parsers.py +++ b/tests/parallel/test_parsers.py @@ -12,7 +12,7 @@ def test_metadata_parser_snapshots(parallel_test_data, imgformat): # Create config instance config = WorkflowConfig() config.input_dir = parallel_test_data.snapshot_imgdir - config.json = "output.json" + config.results = "output.json" config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"] config.workflow = parallel_test_data.workflow_script config.metadata_filters = {"imgtype": "VIS", "camera": "SV"} @@ -37,7 +37,7 @@ def test_metadata_parser_images(parallel_test_data, subdirs, imgformat, outlengt # Create config instance config = WorkflowConfig() config.input_dir = parallel_test_data.flat_imgdir - config.json = "output.json" + config.results = "output.json" config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"] config.workflow = parallel_test_data.workflow_script config.metadata_filters = {"imgtype": "VIS"} @@ -57,7 +57,7 @@ def test_metadata_parser_phenodata(parallel_test_data): # Create config instance config = WorkflowConfig() config.input_dir = parallel_test_data.phenodata_dir - config.json = "output.json" + config.results = "output.json" config.workflow = parallel_test_data.workflow_script config.imgformat = "jpg" @@ -85,7 +85,7 @@ def test_read_checkpoint_data(parallel_test_data): os.chdir(parallel_test_data.datadir) config = WorkflowConfig() config.input_dir = parallel_test_data.phenodata_dir - config.json = "output.json" + config.results = "output.json" config.workflow = parallel_test_data.workflow_script config.imgformat = "jpg" config.checkpoint = True diff --git a/tests/parallel/test_process_results.py b/tests/parallel/test_process_results.py index 4e26532de..6e6ee988e 100644 --- a/tests/parallel/test_process_results.py +++ b/tests/parallel/test_process_results.py @@ -11,7 +11,7 @@ def test_process_results(parallel_test_data, tmpdir): config = type("smallconfig", (), {"tmp_dir": parallel_test_data.parallel_results_dir, "checkpoint": False, - "json": result_file}) + "results": result_file}) # Run twice to create appended results process_results(config) process_results(config) @@ -28,7 +28,7 @@ def test_process_results_new_output(parallel_test_data, tmpdir): config = type("smallconfig", (), {"tmp_dir": parallel_test_data.parallel_results_dir, "checkpoint": False, - "json": result_file}) + "results": result_file}) process_results(config) # Assert output matches expected values @@ -42,7 +42,7 @@ def test_process_results_valid_json(parallel_test_data): config = type("smallconfig", (), {"tmp_dir": parallel_test_data.parallel_results_dir, "checkpoint": "false", - "json": parallel_test_data.valid_json_file}) + "results": parallel_test_data.valid_json_file}) # Test when the file is a valid json file but doesn't contain expected keys with pytest.raises(RuntimeError): process_results(config) @@ -56,6 +56,6 @@ def test_process_results_invalid_json(tmpdir): config = type("smallconfig", (), {"tmp_dir": os.path.split(str(result_file))[0], "checkpoint": "false", - "json": result_file}) + "results": result_file}) with pytest.raises(RuntimeError): process_results(config) diff --git a/tests/parallel/test_workflowconfig.py b/tests/parallel/test_workflowconfig.py index c50d0a1a9..0bc23ae8d 100644 --- a/tests/parallel/test_workflowconfig.py +++ b/tests/parallel/test_workflowconfig.py @@ -43,7 +43,7 @@ def test_validate_config(parallel_test_data, tmpdir): config = WorkflowConfig() # Set valid values in config config.input_dir = parallel_test_data.flat_imgdir - config.json = "valid_config.json" + config.results = "valid_config.json" config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"] config.workflow = parallel_test_data.workflow_script config.img_outdir = str(img_outdir) @@ -59,7 +59,7 @@ def test_invalid_startdate(parallel_test_data, tmpdir): config = WorkflowConfig() # Set valid values in config config.input_dir = parallel_test_data.flat_imgdir - config.json = "valid_config.json" + config.results = "valid_config.json" config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"] config.workflow = parallel_test_data.workflow_script config.img_outdir = str(img_outdir) @@ -76,7 +76,7 @@ def test_invalid_enddate(parallel_test_data, tmpdir): config = WorkflowConfig() # Set valid values in config config.input_dir = config.input_dir = parallel_test_data.flat_imgdir - config.json = "valid_config.json" + config.results = "valid_config.json" config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"] config.workflow = config.workflow = parallel_test_data.workflow_script config.img_outdir = str(img_outdir) @@ -112,7 +112,7 @@ def test_too_many_cluster_config_cores(parallel_test_data): # Create config instance config = WorkflowConfig() config.input_dir = config.input_dir = parallel_test_data.flat_imgdir - config.json = "valid_config.json" + config.results = "valid_config.json" config.workflow = config.workflow = parallel_test_data.workflow_script # Set invalid values in config # input_dir and json are not defined by default, but are required diff --git a/tests/testdata/workflowconfig_template.json b/tests/testdata/workflowconfig_template.json index 75555258c..c2aa846a9 100644 --- a/tests/testdata/workflowconfig_template.json +++ b/tests/testdata/workflowconfig_template.json @@ -1,6 +1,6 @@ { "input_dir": "", - "json": "", + "results": "", "filename_metadata": [], "include_all_subdirs": true, "workflow": "",