From 470eed85ead6c5b81e45e918616b62b11a0f63a2 Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Fri, 14 Feb 2025 16:43:29 -0500 Subject: [PATCH 1/4] Removing lxml and using html5lib to avoid BS4 failing with lxml on HPCs --- pt_cli/connect.py | 3 +-- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pt_cli/connect.py b/pt_cli/connect.py index f1406e6..3776cbd 100755 --- a/pt_cli/connect.py +++ b/pt_cli/connect.py @@ -10,7 +10,6 @@ import requests import bs4 -import lxml logger = logging.getLogger(__name__) @@ -128,7 +127,7 @@ def maybe_json(self, data): return loads except json.decoder.JSONDecodeError: if isinstance(data, str): - soup = bs4.BeautifulSoup(data, features="lxml") + soup = bs4.BeautifulSoup(data, features="html5lib") if soup.get_text().startswith("----------"): sys.stdout.write(soup.get_text()) elif soup.get_text().startswith("Welcome"): diff --git a/pyproject.toml b/pyproject.toml index 3f7f7fd..9c270ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "pyyaml>=6.0", "requests>=2.28", "beautifulsoup4>=4.12", - "lxml", + "html5lib", "shtab", ] From cadca9df8e390e2ea8a62f18a26038ece08bc2f0 Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Mon, 17 Mar 2025 12:24:53 -0400 Subject: [PATCH 2/4] Adding dev rebase with main after release with Action + Git ignoring pdm* files --- .github/workflows/Sync-dev-with-main.yml | 41 ++++++++++++++++++++++++ .gitignore | 1 + 2 files changed, 42 insertions(+) create mode 100644 .github/workflows/Sync-dev-with-main.yml diff --git a/.github/workflows/Sync-dev-with-main.yml b/.github/workflows/Sync-dev-with-main.yml new file mode 100644 index 0000000..c9cfce9 --- /dev/null +++ b/.github/workflows/Sync-dev-with-main.yml @@ -0,0 +1,41 @@ +name: Sync dev with main + +on: + workflow_run: + workflows: ["Tag and Release"] + types: + - completed + +jobs: + rebase: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Git + run: | + git config user.email "github-actions[bot]@users.noreply.github.com" + git config user.name "github-actions[bot]" + + - name: Fetch all branches + run: git fetch --all + + - name: Rebase dev with main + run: | + git checkout dev + git rebase origin/main + + - name: Update the version to .dev + id: update_version + run: | + version_file="genpipes/__version__.py" + version_number=$(grep -oP '(?<=__version__ = ")[^"]+' $version_file) + echo "__version__ = '${version_number}.dev'" > $version_file + + - name: Push changes + run: | + git push --force-with-lease origin dev diff --git a/.gitignore b/.gitignore index 898f0b6..ae221bb 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ venv __pycache__/ .DS_Store .pdm* +pdm* dist/ # Tests From ed6996f71693d092b1f950ac44a97e036f1d7a5a Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Tue, 29 Apr 2025 17:21:59 -0400 Subject: [PATCH 3/4] Adding getid method --- pt_cli/cli.py | 7 +++- pt_cli/tools.py | 95 +++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 95 insertions(+), 7 deletions(-) diff --git a/pt_cli/cli.py b/pt_cli/cli.py index f0667ef..11d9ea1 100755 --- a/pt_cli/cli.py +++ b/pt_cli/cli.py @@ -25,7 +25,9 @@ UnDelete, Deprecate, UnDeprecate, - Curate + Curate, + GetID, + Location, ) from .__version__ import __version__ @@ -170,6 +172,9 @@ def projects(parsed_local): UnDeprecate(connection_obj=connector_session, subparser=subparser) Curate(connection_obj=connector_session, subparser=subparser) + getid_subparser = GetID(subparser).subparser + Location(connection_obj=connector_session, subparser=getid_subparser) + shtab.add_argument_to(parser, ["-s", "--print-completion"]) diff --git a/pt_cli/tools.py b/pt_cli/tools.py index 20e61a9..c01ca53 100644 --- a/pt_cli/tools.py +++ b/pt_cli/tools.py @@ -465,7 +465,7 @@ def json_to_unanalyzed(self): unanalyzed = self.unanalyzed if not self.output_file: if isinstance(unanalyzed, str): - soup = bs4.BeautifulSoup(unanalyzed, features="lxml") + soup = bs4.BeautifulSoup(unanalyzed, features="html5lib") return sys.stdout.write(soup.get_text()) # else case, not explicitely written return sys.stdout.write(json.dumps(unanalyzed)) @@ -489,6 +489,20 @@ def func(self, parsed_args): self.output_file = parsed_args.output self.json_to_unanalyzed() +class Undelivered(AddCMD): + """ + Undelivered is a sub-command of Digest subparser using base AddCMD class + """ + __tool_name__ = 'undelivered' + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.parsed_input = None + self.output_file = None + + def help(self): + return "Will return undelivered Samples name/ID or Readsets name/ID" + + class Delivery(AddCMD): """ Delivery is a sub-command of Digest subparser using base AddCMD class @@ -509,10 +523,9 @@ def arguments(self): self.parser.add_argument('--specimen_id', help='Specimen ID to be selected', nargs='+') self.parser.add_argument('--sample_id', help='Sample ID to be selected', nargs='+') self.parser.add_argument('--readset_id', help='Readset ID to be selected', nargs='+') - self.parser.add_argument('--experiment_nucleic_acid_type', help="Experiment nucleic_acid_type characterizing the Samples/Readsets (RNA or DNA)", required=False) + self.parser.add_argument('--experiment_nucleic_acid_type', help="Experiment nucleic_acid_type characterizing the Samples/Readsets (RNA or DNA)", required=True) self.parser.add_argument('--endpoint', help="Endpoint in which data is located", required=True) self.parser.add_argument('--output', '-o', help="Name of output file (Default: terminal), formatted as Json file with sample/readset and endpoint") - # self.parser.add_argument('--input-json', help="Json file with all parameters") @property def delivery(self): @@ -564,14 +577,14 @@ def json_to_delivery(self): delivery = self.delivery if not self.output_file: if isinstance(delivery, str): - soup = bs4.BeautifulSoup(delivery, features="lxml") + soup = bs4.BeautifulSoup(delivery, features="html5lib") return sys.stdout.write(soup.get_text()) # else case, not explicitely written - return sys.stdout.write(json.dumps(delivery)) + return sys.stdout.write(json.dumps(delivery["DB_ACTION_OUTPUT"])) if not delivery: raise EmptyGetError with open(self.output_file, "w", encoding="utf-8") as out_pair_file: - json.dump(delivery, out_pair_file, ensure_ascii=False, indent=4) + json.dump(delivery["DB_ACTION_OUTPUT"], out_pair_file, ensure_ascii=False, indent=4) logger.info(f"Delivery file written to {self.output_file}") def func(self, parsed_args): @@ -950,3 +963,73 @@ def func(self, parsed_args): pass else: sys.stdout.write("\n".join(response["DB_ACTION_OUTPUT"])) + +class GetID: + """ + GetID is a subparser of the client in which you can query per table entries to get their IDs + """ + __tool_name__ = 'getid' + + def __init__(self, subparser=argparse.ArgumentParser().add_subparsers()): + self.subparser = subparser.add_parser(self.__tool_name__, help=self.help(), add_help=True).add_subparsers() + + def help(self): + """ + :return: the tool help string + """ + return f"All {self.__tool_name__} sub commands, those encapsulate all tables from the database to be queried and get the ID. Use 'pt_cli {self.__tool_name__} --help' to see more details." + + +class Location(AddCMD): + """ + Location is a sub-command of GetID subparser using base AddCMD class + """ + __tool_name__ = 'location' + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.parsed_input = None + self.output_file = None + + def help(self): + return "Will return location ID based on location endpoint and file name" + + def arguments(self): + self.parser.add_argument('--endpoint', help='Endpoint in which data is located', required=True) + self.parser.add_argument('--file_name', help='File Name linked to the location', required=True) + + @property + def get_location(self): + ''' + Returns a list of location IDs of GenPipes of the API call for get_location + :return: + ''' + return self.post('project/get_location', data=self.parsed_input) + + def jsonify_input(self, parsed_args): + ''' + :return: jsonified input args + ''' + json = { + "location_endpoint": parsed_args.endpoint, + "file_name": parsed_args.file_name + } + + return json + + + def func(self, parsed_args): + super().func(parsed_args) + # Dev case when using --data-file + self.parsed_input = self.data() + + # When --data-file is empty + if not self.parsed_input: + self.parsed_input = json.dumps(self.jsonify_input(parsed_args), ensure_ascii=False, indent=4) + if not self.parsed_input: + raise BadArgumentError + + get_location = self.get_location + if isinstance(get_location, str): + soup = bs4.BeautifulSoup(get_location, features="html5lib") + return sys.stdout.write(soup.get_text()) + return sys.stdout.write(''.join(get_location["DB_ACTION_OUTPUT"])) From ccbd2c0e8ef8f3195a183534a11da5930b5785de Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Tue, 29 Apr 2025 17:27:11 -0400 Subject: [PATCH 4/4] Consolidating GH Action --- .github/workflows/Sync-dev-with-main.yml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/Sync-dev-with-main.yml b/.github/workflows/Sync-dev-with-main.yml index c9cfce9..84e717b 100644 --- a/.github/workflows/Sync-dev-with-main.yml +++ b/.github/workflows/Sync-dev-with-main.yml @@ -1,20 +1,23 @@ -name: Sync dev with main +name: Rebase main and dev and add .dev to version on: workflow_run: workflows: ["Tag and Release"] types: - completed + workflow_dispatch: jobs: rebase: runs-on: ubuntu-latest - + if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }} steps: - name: Checkout code uses: actions/checkout@v4 with: + ref: ${{ github.head_ref }} fetch-depth: 0 + token: ${{ secrets.BOT_ACCESS_TOKEN }} - name: Setup Git run: | @@ -33,9 +36,13 @@ jobs: id: update_version run: | version_file="genpipes/__version__.py" - version_number=$(grep -oP '(?<=__version__ = ")[^"]+' $version_file) - echo "__version__ = '${version_number}.dev'" > $version_file + version_number=$(sed -n "s/__version__ = '\([^']*\)'/\1/p" $version_file) + if [[ "$version_number" != *.dev ]]; then + echo "__version__ = '${version_number}.dev'" > $version_file + fi - - name: Push changes + - name: Commit changes run: | - git push --force-with-lease origin dev + git add genpipes/__version__.py + git commit -m "Dev Version update" + git push --force-with-lease