Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/parallel_config.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ Validate parameters/structure of configuration data.
* **input_dir**: (str, required): path/name of input images directory (validates that it exists).


* **json**: (str, required): path/name of output JSON data file (appends new data if it already exists).
* **results**: (str, required): path/name of output JSON data file (appends new data if it already exists).


* **filename_metadata**: (list, required): list of metadata terms used to construct filenames. for example:
Expand Down Expand Up @@ -206,7 +206,7 @@ config.import_config(config_file="my_config.json")

# Change configuration values directly in Python as needed. At a minimum you must specify input_dir, json, filename_metadata, workflow.
config.input_dir = "./my_images"
config.json = "output.json"
config.results = "output.json"
config.filename_metadata = ["plantbarcode", "timestamp"]
config.workflow = "my_workflow.py"

Expand Down
8 changes: 4 additions & 4 deletions docs/pipeline_parallel.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ Sample image filename: `cam1_16-08-06-16:45_el1100s1_p19.jpg`
```
{
"input_dir": "/shares/mgehan_share/raw_data/raw_image/2016-08_pat-edger/data/split-round1/split-cam1",
"json": "edger-round1-brassica.json",
"results": "edger-round1-brassica.json",
"filename_metadata": ["camera", "timestamp", "id", "other"],
"workflow": "/home/mgehan/pat-edger/round1-python-pipelines/2016-08_pat-edger_brassica-cam1-splitimg.py",
"img_outdir": "/shares/mgehan_share/raw_data/raw_image/2016-08_pat-edger/data/split-round1/split-cam1/output",
Expand Down Expand Up @@ -203,7 +203,7 @@ in a list to the `filename_metadata` parameter.
```bash
{
"input_dir": "input_directory",
"json": "output.json",
"results": "output.json",
"filename_metadata": ["camera", "plantbarcode", "timestamp"],
"workflow": "user-workflow.py",
"img_outdir": "output_directory",
Expand Down Expand Up @@ -263,7 +263,7 @@ Finally, we filter the basename for top view rgb images with "TV_VIS.*".
```bash
{
"input_dir": "input_directory",
"json": "output.json",
"results": "output.json",
"filename_metadata": [""],
"workflow": "user-workflow.py",
"img_outdir": "output_directory",
Expand Down Expand Up @@ -315,7 +315,7 @@ To identify each image within our workflow, we will name them based on the `imgt
```
{
"input_dir": "/shares/mgehan_share/raw_data/raw_image/2016-08_pat-edger/data/split-round1/split-cam1",
"json": "edger-round1-brassica.json",
"results": "edger-round1-brassica.json",
"filename_metadata": ["imgtype", "timestamp", "id", "other"],
"workflow": "/home/mgehan/pat-edger/round1-python-pipelines/2016-08_pat-edger_brassica-cam1-splitimg.py",
"img_outdir": "/shares/mgehan_share/raw_data/raw_image/2016-08_pat-edger/data/split-round1/split-cam1/output",
Expand Down
4 changes: 4 additions & 0 deletions docs/updating.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ automatically. Alternatively, you can run `pip install -e .` to reinstall the pa

### Breaking changes between v4 and v5 <a name="breaking-changes"></a>

#### plantcv.parallel.WorkflowConfig

Renamed the "json" attribute to "results" for clarity about what it controls and for consistency with new [jupyterconfig](parallel_jupyterconfig.md)

#### plantcv.spectral_index.egi

Renamed the input parameter `rgb_img` to `img` to reflect the flexibility of using the [EGI index function](spectral_index.md)
Expand Down
4 changes: 2 additions & 2 deletions plantcv/parallel/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def options():
# Import a configuration if provided
if args.config:
config.import_config(config_file=args.config)
if args.config == config.json:
print("Error: Configuration file would be overwritten by results, change the json field of config.",
if args.config == config.results:
print("Configuration file would be overwritten by results, change the results field of config.",
file=sys.stderr)
sys.exit(1)

Expand Down
6 changes: 2 additions & 4 deletions plantcv/parallel/jupyterconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def save_config(self):
object.__setattr__(config, attr, getattr(self, attr))
# set a few manually due to property differences
config.workflow = self.workflow
config.json = self.results
config.results = self.results
# save
config.save_config(config_file=self.config)
parallel_print("Saved " + self.config, verbose=self.verbose)
Expand All @@ -225,9 +225,7 @@ def import_config(self, config_file):
# Import the JSON configuration data
config = json.load(fp)
for key, value in config.items():
if key == "json":
object.__setattr__(self, "results", value)
elif key != "_metadata_terms":
if key != "_metadata_terms":
object.__setattr__(self, key, value)

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion plantcv/parallel/process_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def process_results(config):
# process results from the checkpoint inside start point for tmp dirs
job_dir = os.path.join(config.chkpt_start_dir, "_PCV_PARALLEL_CHECKPOINT_")
# name outputs from config
json_file = config.json
json_file = config.results
# Data dictionary
data = {"variables": {}, "entities": []}
if os.path.exists(json_file):
Expand Down
6 changes: 3 additions & 3 deletions plantcv/parallel/run_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def run_parallel(config):
os.makedirs(config.img_outdir, exist_ok=True)

# Remove JSON results file if append=False
if not config.append and os.path.exists(config.json):
os.remove(config.json)
if not config.append and os.path.exists(config.results):
os.remove(config.results)

# Read image metadata
###########################################
Expand Down Expand Up @@ -82,7 +82,7 @@ def run_parallel(config):
# Convert results start time
convert_results_start_time = time.time()
print("Converting json to csv... ", file=sys.stderr)
plantcv.utils.json2csv(config.json, os.path.splitext(config.json)[0])
plantcv.utils.json2csv(config.results, os.path.splitext(config.results)[0])
convert_results_clock_time = time.time() - convert_results_start_time
parallel_print(f"Processing results took {convert_results_clock_time} seconds.", file=sys.stderr, verbose=verbose)
###########################################
Expand Down
9 changes: 4 additions & 5 deletions plantcv/parallel/workflowconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class WorkflowConfig:

def __init__(self):
object.__setattr__(self, "input_dir", "")
object.__setattr__(self, "json", "")
object.__setattr__(self, "results", "")
object.__setattr__(self, "filename_metadata", [])
object.__setattr__(self, "workflow", "")
object.__setattr__(self, "img_outdir", "./output_images")
Expand Down Expand Up @@ -99,9 +99,9 @@ def validate_config(self):
print(f"Error: input directory (input_dir) is required and {self.input_dir} does not exist.",
file=sys.stderr)
checks.append(False)
# Validate JSON file
if self.json == "":
print("Error: an output JSON file (json) is required but is currently undefined.", file=sys.stderr)
# Validate JSON results file
if self.results == "":
print("Error: an output JSON file (results) is required but is currently undefined.", file=sys.stderr)
checks.append(False)
# Validate workflow script
if not os.path.exists(self.workflow):
Expand Down Expand Up @@ -308,7 +308,6 @@ def _config_attr_lookup(config, attr, val):
# for all other attributes, get their data from list
config_control = {
"input_dir": ["Images will be read from {}", str],
"json": ["output will be written to {}", str],
"filename_metadata": ["Filenames will be parsed into {}", list],
"workflow": ["Will run {} python script in each job", str],
"img_outdir": ["Output images will be written to {}", str],
Expand Down
6 changes: 3 additions & 3 deletions tests/parallel/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_parallel_cli_invalid_config(parallel_test_data, tmpdir):
conf_file = tmpdir.mkdir("cache").join("config.json")
config = WorkflowConfig()
# Set valid values in config
config.json = "valid_config.json"
config.results = "valid_config.json"
config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"]
config.workflow = parallel_test_data.workflow_script
config.img_outdir = str(conf_file.dirpath())
Expand All @@ -43,7 +43,7 @@ def test_parallel_cli_overwriting_config(parallel_test_data, tmpdir):
conf_file = tmpdir.mkdir("cache").join("config.json")
config = WorkflowConfig()
# Set valid values in config
config.json = conf_file.strpath
config.results = conf_file.strpath
config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"]
config.workflow = parallel_test_data.workflow_script
config.img_outdir = str(conf_file.dirpath())
Expand All @@ -65,7 +65,7 @@ def test_parallel_cli_valid_config(parallel_test_data, tmpdir):
config = WorkflowConfig()
# Set valid values in config
config.input_dir = parallel_test_data.flat_imgdir
config.json = conf_file.dirpath().join(os.path.basename(parallel_test_data.new_results_file)).strpath
config.results = conf_file.dirpath().join(os.path.basename(parallel_test_data.new_results_file)).strpath
config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"]
config.metadata_regex = {"filepath": ".*"}
config.workflow = parallel_test_data.workflow_script
Expand Down
6 changes: 3 additions & 3 deletions tests/parallel/test_job_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def test_job_builder_single_image(parallel_test_data, tmpdir):
# Create config instance
config = WorkflowConfig()
config.input_dir = parallel_test_data.snapshot_imgdir
config.json = "output.json"
config.results = "output.json"
config.tmp_dir = str(tmp_dir)
config.filename_metadata = ["imgtype", "camera", "rotation", "zoom", "lifter", "gain", "exposure", "id"]
config.workflow = parallel_test_data.workflow_script
Expand Down Expand Up @@ -41,7 +41,7 @@ def test_job_builder_coprocess(parallel_test_data, tmpdir):
# Create config instance
config = WorkflowConfig()
config.input_dir = parallel_test_data.snapshot_imgdir
config.json = "output.json"
config.results = "output.json"
config.tmp_dir = str(tmp_dir)
config.filename_metadata = ["imgtype", "camera", "rotation", "zoom", "lifter", "gain", "exposure", "id"]
config.workflow = parallel_test_data.workflow_script
Expand Down Expand Up @@ -74,7 +74,7 @@ def test_job_builder_auto_name(parallel_test_data, tmpdir):
# Create config instance
config = WorkflowConfig()
config.input_dir = parallel_test_data.snapshot_imgdir
config.json = "output.json"
config.results = "output.json"
config.tmp_dir = str(tmp_dir)
config.filename_metadata = ["imgtype", "camera", "rotation", "zoom", "lifter", "gain", "exposure", "id"]
config.workflow = parallel_test_data.workflow_script
Expand Down
2 changes: 1 addition & 1 deletion tests/parallel/test_jupyterconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_jupcon_run(parallel_test_data, tmpdir):
jupcon.notebook = jupcon.find_notebook()
jupcon.input_dir = parallel_test_data.flat_imgdir
jupcon.workflow = "example.py"
jupcon.results = "example"
jupcon.results = "example.json"
jupcon.run()
assert os.path.exists(jupcon.results)

Expand Down
8 changes: 4 additions & 4 deletions tests/parallel/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_metadata_parser_snapshots(parallel_test_data, imgformat):
# Create config instance
config = WorkflowConfig()
config.input_dir = parallel_test_data.snapshot_imgdir
config.json = "output.json"
config.results = "output.json"
config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"]
config.workflow = parallel_test_data.workflow_script
config.metadata_filters = {"imgtype": "VIS", "camera": "SV"}
Expand All @@ -37,7 +37,7 @@ def test_metadata_parser_images(parallel_test_data, subdirs, imgformat, outlengt
# Create config instance
config = WorkflowConfig()
config.input_dir = parallel_test_data.flat_imgdir
config.json = "output.json"
config.results = "output.json"
config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"]
config.workflow = parallel_test_data.workflow_script
config.metadata_filters = {"imgtype": "VIS"}
Expand All @@ -57,7 +57,7 @@ def test_metadata_parser_phenodata(parallel_test_data):
# Create config instance
config = WorkflowConfig()
config.input_dir = parallel_test_data.phenodata_dir
config.json = "output.json"
config.results = "output.json"
config.workflow = parallel_test_data.workflow_script
config.imgformat = "jpg"

Expand Down Expand Up @@ -85,7 +85,7 @@ def test_read_checkpoint_data(parallel_test_data):
os.chdir(parallel_test_data.datadir)
config = WorkflowConfig()
config.input_dir = parallel_test_data.phenodata_dir
config.json = "output.json"
config.results = "output.json"
config.workflow = parallel_test_data.workflow_script
config.imgformat = "jpg"
config.checkpoint = True
Expand Down
8 changes: 4 additions & 4 deletions tests/parallel/test_process_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_process_results(parallel_test_data, tmpdir):
config = type("smallconfig", (),
{"tmp_dir": parallel_test_data.parallel_results_dir,
"checkpoint": False,
"json": result_file})
"results": result_file})
# Run twice to create appended results
process_results(config)
process_results(config)
Expand All @@ -28,7 +28,7 @@ def test_process_results_new_output(parallel_test_data, tmpdir):
config = type("smallconfig", (),
{"tmp_dir": parallel_test_data.parallel_results_dir,
"checkpoint": False,
"json": result_file})
"results": result_file})
process_results(config)

# Assert output matches expected values
Expand All @@ -42,7 +42,7 @@ def test_process_results_valid_json(parallel_test_data):
config = type("smallconfig", (),
{"tmp_dir": parallel_test_data.parallel_results_dir,
"checkpoint": "false",
"json": parallel_test_data.valid_json_file})
"results": parallel_test_data.valid_json_file})
# Test when the file is a valid json file but doesn't contain expected keys
with pytest.raises(RuntimeError):
process_results(config)
Expand All @@ -56,6 +56,6 @@ def test_process_results_invalid_json(tmpdir):
config = type("smallconfig", (),
{"tmp_dir": os.path.split(str(result_file))[0],
"checkpoint": "false",
"json": result_file})
"results": result_file})
with pytest.raises(RuntimeError):
process_results(config)
8 changes: 4 additions & 4 deletions tests/parallel/test_workflowconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_validate_config(parallel_test_data, tmpdir):
config = WorkflowConfig()
# Set valid values in config
config.input_dir = parallel_test_data.flat_imgdir
config.json = "valid_config.json"
config.results = "valid_config.json"
config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"]
config.workflow = parallel_test_data.workflow_script
config.img_outdir = str(img_outdir)
Expand All @@ -59,7 +59,7 @@ def test_invalid_startdate(parallel_test_data, tmpdir):
config = WorkflowConfig()
# Set valid values in config
config.input_dir = parallel_test_data.flat_imgdir
config.json = "valid_config.json"
config.results = "valid_config.json"
config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"]
config.workflow = parallel_test_data.workflow_script
config.img_outdir = str(img_outdir)
Expand All @@ -76,7 +76,7 @@ def test_invalid_enddate(parallel_test_data, tmpdir):
config = WorkflowConfig()
# Set valid values in config
config.input_dir = config.input_dir = parallel_test_data.flat_imgdir
config.json = "valid_config.json"
config.results = "valid_config.json"
config.filename_metadata = ["imgtype", "camera", "frame", "zoom", "lifter", "gain", "exposure", "id"]
config.workflow = config.workflow = parallel_test_data.workflow_script
config.img_outdir = str(img_outdir)
Expand Down Expand Up @@ -112,7 +112,7 @@ def test_too_many_cluster_config_cores(parallel_test_data):
# Create config instance
config = WorkflowConfig()
config.input_dir = config.input_dir = parallel_test_data.flat_imgdir
config.json = "valid_config.json"
config.results = "valid_config.json"
config.workflow = config.workflow = parallel_test_data.workflow_script
# Set invalid values in config
# input_dir and json are not defined by default, but are required
Expand Down
2 changes: 1 addition & 1 deletion tests/testdata/workflowconfig_template.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"input_dir": "",
"json": "",
"results": "",
"filename_metadata": [],
"include_all_subdirs": true,
"workflow": "",
Expand Down