diff --git a/config.py b/config.py index 40c004b31..f5f232c92 100644 --- a/config.py +++ b/config.py @@ -1,6 +1,6 @@ # In this file, you can set the configurations of the app. -from constants import DEBUG, LLM_MODEL, OPENAI +from src.utils.constants import DEBUG, LLM_MODEL, OPENAI #config related to logging must have prefix LOG_ LOG_LEVEL = DEBUG diff --git a/docs/LICENSE b/docs/LICENSE deleted file mode 100644 index 12429f4b9..000000000 --- a/docs/LICENSE +++ /dev/null @@ -1,397 +0,0 @@ -Copyright (C) 2024 AI Hawk FOSS - -Attribution 4.0 International - -======================================================================= - -Creative Commons Corporation ("Creative Commons") is not a law firm and -does not provide legal services or legal advice. Distribution of -Creative Commons public licenses does not create a lawyer-client or -other relationship. Creative Commons makes its licenses and related -information available on an "as-is" basis. Creative Commons gives no -warranties regarding its licenses, any material licensed under their -terms and conditions, or any related information. Creative Commons -disclaims all liability for damages resulting from their use to the -fullest extent possible. - -Using Creative Commons Public Licenses - -Creative Commons public licenses provide a standard set of terms and -conditions that creators and other rights holders may use to share -original works of authorship and other material subject to copyright -and certain other rights specified in the public license below. The -following considerations are for informational purposes only, are not -exhaustive, and do not form part of our licenses. - - Considerations for licensors: Our public licenses are - intended for use by those authorized to give the public - permission to use material in ways otherwise restricted by - copyright and certain other rights. Our licenses are - irrevocable. Licensors should read and understand the terms - and conditions of the license they choose before applying it. - Licensors should also secure all rights necessary before - applying our licenses so that the public can reuse the - material as expected. Licensors should clearly mark any - material not subject to the license. This includes other CC- - licensed material, or material used under an exception or - limitation to copyright. More considerations for licensors: - wiki.creativecommons.org/Considerations_for_licensors - - Considerations for the public: By using one of our public - licenses, a licensor grants the public permission to use the - licensed material under specified terms and conditions. If - the licensor's permission is not necessary for any reason--for - example, because of any applicable exception or limitation to - copyright--then that use is not regulated by the license. Our - licenses grant only permissions under copyright and certain - other rights that a licensor has authority to grant. Use of - the licensed material may still be restricted for other - reasons, including because others have copyright or other - rights in the material. A licensor may make special requests, - such as asking that all changes be marked or described. - Although not required by our licenses, you are encouraged to - respect those requests where reasonable. More_considerations - for the public: - wiki.creativecommons.org/Considerations_for_licensees - -======================================================================= - -Creative Commons Attribution 4.0 International Public License - -By exercising the Licensed Rights (defined below), You accept and agree -to be bound by the terms and conditions of this Creative Commons -Attribution 4.0 International Public License ("Public License"). To the -extent this Public License may be interpreted as a contract, You are -granted the Licensed Rights in consideration of Your acceptance of -these terms and conditions, and the Licensor grants You such rights in -consideration of benefits the Licensor receives from making the -Licensed Material available under these terms and conditions. - - -Section 1 -- Definitions. - - a. Adapted Material means material subject to Copyright and Similar - Rights that is derived from or based upon the Licensed Material - and in which the Licensed Material is translated, altered, - arranged, transformed, or otherwise modified in a manner requiring - permission under the Copyright and Similar Rights held by the - Licensor. For purposes of this Public License, where the Licensed - Material is a musical work, performance, or sound recording, - Adapted Material is always produced where the Licensed Material is - synched in timed relation with a moving image. - - b. Adapter's License means the license You apply to Your Copyright - and Similar Rights in Your contributions to Adapted Material in - accordance with the terms and conditions of this Public License. - - c. Copyright and Similar Rights means copyright and/or similar rights - closely related to copyright including, without limitation, - performance, broadcast, sound recording, and Sui Generis Database - Rights, without regard to how the rights are labeled or - categorized. For purposes of this Public License, the rights - specified in Section 2(b)(1)-(2) are not Copyright and Similar - Rights. - - d. Effective Technological Measures means those measures that, in the - absence of proper authority, may not be circumvented under laws - fulfilling obligations under Article 11 of the WIPO Copyright - Treaty adopted on December 20, 1996, and/or similar international - agreements. - - e. Exceptions and Limitations means fair use, fair dealing, and/or - any other exception or limitation to Copyright and Similar Rights - that applies to Your use of the Licensed Material. - - f. Licensed Material means the artistic or literary work, database, - or other material to which the Licensor applied this Public - License. - - g. Licensed Rights means the rights granted to You subject to the - terms and conditions of this Public License, which are limited to - all Copyright and Similar Rights that apply to Your use of the - Licensed Material and that the Licensor has authority to license. - - h. Licensor means the individual(s) or entity(ies) granting rights - under this Public License. - - i. Share means to provide material to the public by any means or - process that requires permission under the Licensed Rights, such - as reproduction, public display, public performance, distribution, - dissemination, communication, or importation, and to make material - available to the public including in ways that members of the - public may access the material from a place and at a time - individually chosen by them. - - j. Sui Generis Database Rights means rights other than copyright - resulting from Directive 96/9/EC of the European Parliament and of - the Council of 11 March 1996 on the legal protection of databases, - as amended and/or succeeded, as well as other essentially - equivalent rights anywhere in the world. - - k. You means the individual or entity exercising the Licensed Rights - under this Public License. Your has a corresponding meaning. - - -Section 2 -- Scope. - - a. License grant. - - 1. Subject to the terms and conditions of this Public License, - the Licensor hereby grants You a worldwide, royalty-free, - non-sublicensable, non-exclusive, irrevocable license to - exercise the Licensed Rights in the Licensed Material to: - - a. reproduce and Share the Licensed Material, in whole or - in part; and - - b. produce, reproduce, and Share Adapted Material. - - 2. Exceptions and Limitations. For the avoidance of doubt, where - Exceptions and Limitations apply to Your use, this Public - License does not apply, and You do not need to comply with - its terms and conditions. - - 3. Term. The term of this Public License is specified in Section - 6(a). - - 4. Media and formats; technical modifications allowed. The - Licensor authorizes You to exercise the Licensed Rights in - all media and formats whether now known or hereafter created, - and to make technical modifications necessary to do so. The - Licensor waives and/or agrees not to assert any right or - authority to forbid You from making technical modifications - necessary to exercise the Licensed Rights, including - technical modifications necessary to circumvent Effective - Technological Measures. For purposes of this Public License, - simply making modifications authorized by this Section 2(a) - (4) never produces Adapted Material. - - 5. Downstream recipients. - - a. Offer from the Licensor -- Licensed Material. Every - recipient of the Licensed Material automatically - receives an offer from the Licensor to exercise the - Licensed Rights under the terms and conditions of this - Public License. - - b. No downstream restrictions. You may not offer or impose - any additional or different terms or conditions on, or - apply any Effective Technological Measures to, the - Licensed Material if doing so restricts exercise of the - Licensed Rights by any recipient of the Licensed - Material. - - 6. No endorsement. Nothing in this Public License constitutes or - may be construed as permission to assert or imply that You - are, or that Your use of the Licensed Material is, connected - with, or sponsored, endorsed, or granted official status by, - the Licensor or others designated to receive attribution as - provided in Section 3(a)(1)(A)(i). - - b. Other rights. - - 1. Moral rights, such as the right of integrity, are not - licensed under this Public License, nor are publicity, - privacy, and/or other similar personality rights; however, to - the extent possible, the Licensor waives and/or agrees not to - assert any such rights held by the Licensor to the limited - extent necessary to allow You to exercise the Licensed - Rights, but not otherwise. - - 2. Patent and trademark rights are not licensed under this - Public License. - - 3. To the extent possible, the Licensor waives any right to - collect royalties from You for the exercise of the Licensed - Rights, whether directly or through a collecting society - under any voluntary or waivable statutory or compulsory - licensing scheme. In all other cases the Licensor expressly - reserves any right to collect such royalties. - - -Section 3 -- License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the -following conditions. - - a. Attribution. - - 1. If You Share the Licensed Material (including in modified - form), You must: - - a. retain the following if it is supplied by the Licensor - with the Licensed Material: - - i. identification of the creator(s) of the Licensed - Material and any others designated to receive - attribution, in any reasonable manner requested by - the Licensor (including by pseudonym if - designated); - - ii. a copyright notice; - - iii. a notice that refers to this Public License; - - iv. a notice that refers to the disclaimer of - warranties; - - v. a URI or hyperlink to the Licensed Material to the - extent reasonably practicable; - - b. indicate if You modified the Licensed Material and - retain an indication of any previous modifications; and - - c. indicate the Licensed Material is licensed under this - Public License, and include the text of, or the URI or - hyperlink to, this Public License. - - 2. You may satisfy the conditions in Section 3(a)(1) in any - reasonable manner based on the medium, means, and context in - which You Share the Licensed Material. For example, it may be - reasonable to satisfy the conditions by providing a URI or - hyperlink to a resource that includes the required - information. - - 3. If requested by the Licensor, You must remove any of the - information required by Section 3(a)(1)(A) to the extent - reasonably practicable. - - 4. If You Share Adapted Material You produce, the Adapter's - License You apply must not prevent recipients of the Adapted - Material from complying with this Public License. - - -Section 4 -- Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that -apply to Your use of the Licensed Material: - - a. for the avoidance of doubt, Section 2(a)(1) grants You the right - to extract, reuse, reproduce, and Share all or a substantial - portion of the contents of the database; - - b. if You include all or a substantial portion of the database - contents in a database in which You have Sui Generis Database - Rights, then the database in which You have Sui Generis Database - Rights (but not its individual contents) is Adapted Material; and - - c. You must comply with the conditions in Section 3(a) if You Share - all or a substantial portion of the contents of the database. - -For the avoidance of doubt, this Section 4 supplements and does not -replace Your obligations under this Public License where the Licensed -Rights include other Copyright and Similar Rights. - - -Section 5 -- Disclaimer of Warranties and Limitation of Liability. - - a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE - EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS - AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF - ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, - IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, - WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR - PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, - ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT - KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT - ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. - - b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE - TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, - NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, - INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, - COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR - USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR - DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR - IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. - - c. The disclaimer of warranties and limitation of liability provided - above shall be interpreted in a manner that, to the extent - possible, most closely approximates an absolute disclaimer and - waiver of all liability. - - -Section 6 -- Term and Termination. - - a. This Public License applies for the term of the Copyright and - Similar Rights licensed here. However, if You fail to comply with - this Public License, then Your rights under this Public License - terminate automatically. - - b. Where Your right to use the Licensed Material has terminated under - Section 6(a), it reinstates: - - 1. automatically as of the date the violation is cured, provided - it is cured within 30 days of Your discovery of the - violation; or - - 2. upon express reinstatement by the Licensor. - - For the avoidance of doubt, this Section 6(b) does not affect any - right the Licensor may have to seek remedies for Your violations - of this Public License. - - c. For the avoidance of doubt, the Licensor may also offer the - Licensed Material under separate terms or conditions or stop - distributing the Licensed Material at any time; however, doing so - will not terminate this Public License. - - d. Sections 1, 5, 6, 7, and 8 survive termination of this Public - License. - - -Section 7 -- Other Terms and Conditions. - - a. The Licensor shall not be bound by any additional or different - terms or conditions communicated by You unless expressly agreed. - - b. Any arrangements, understandings, or agreements regarding the - Licensed Material not stated herein are separate from and - independent of the terms and conditions of this Public License. - - -Section 8 -- Interpretation. - - a. For the avoidance of doubt, this Public License does not, and - shall not be interpreted to, reduce, limit, restrict, or impose - conditions on any use of the Licensed Material that could lawfully - be made without permission under this Public License. - - b. To the extent possible, if any provision of this Public License is - deemed unenforceable, it shall be automatically reformed to the - minimum extent necessary to make it enforceable. If the provision - cannot be reformed, it shall be severed from this Public License - without affecting the enforceability of the remaining terms and - conditions. - - c. No term or condition of this Public License will be waived and no - failure to comply consented to unless expressly agreed to by the - Licensor. - - d. Nothing in this Public License constitutes or may be interpreted - as a limitation upon, or waiver of, any privileges and immunities - that apply to the Licensor or You, including from the legal - processes of any jurisdiction or authority. - - -======================================================================= - -Creative Commons is not a party to its public -licenses. Notwithstanding, Creative Commons may elect to apply one of -its public licenses to material it publishes and in those instances -will be considered the “Licensor.” The text of the Creative Commons -public licenses is dedicated to the public domain under the CC0 Public -Domain Dedication. Except for the limited purpose of indicating that -material is shared under a Creative Commons public license or as -otherwise permitted by the Creative Commons policies published at -creativecommons.org/policies, Creative Commons does not authorize the -use of the trademark "Creative Commons" or any other trademark or logo -of Creative Commons without its prior written consent including, -without limitation, in connection with any unauthorized modifications -to any of its public licenses or any other arrangements, -understandings, or agreements concerning use of licensed material. For -the avoidance of doubt, this paragraph does not form part of the -public licenses. - -Creative Commons may be contacted at creativecommons.org. \ No newline at end of file diff --git a/docs/development_diagrams.md b/docs/development_diagrams.md deleted file mode 100644 index 12c6795db..000000000 --- a/docs/development_diagrams.md +++ /dev/null @@ -1,137 +0,0 @@ -# AIHawk Development Diagrams - -## JobApplicationProfile class - -```mermaid -classDiagram - JobApplicationProfile *-- SelfIdentification - JobApplicationProfile *-- LegalAuthorization - JobApplicationProfile *-- WorkPreferences - JobApplicationProfile *-- Availability - JobApplicationProfile *-- SalaryExpectations - - class JobApplicationProfile { - +SelfIdentification self_identification - +LegalAuthorization legal_authorization - +WorkPreferences work_preferences - +Availability availability - +SalaryExpectations salary_expectations - +__init__(yaml_str) - +__str__() - } - - class SelfIdentification { - +str gender - +str pronouns - +str veteran - +str disability - +str ethnicity - } - - class LegalAuthorization { - +str eu_work_authorization - +str us_work_authorization - +str requires_us_visa - +str legally_allowed_to_work_in_us - +str requires_us_sponsorship - +str requires_eu_visa - +str legally_allowed_to_work_in_eu - +str requires_eu_sponsorship - +str canada_work_authorization - +str requires_canada_visa - +str legally_allowed_to_work_in_canada - +str requires_canada_sponsorship - +str uk_work_authorization - +str requires_uk_visa - +str legally_allowed_to_work_in_uk - +str requires_uk_sponsorship - } - - class WorkPreferences { - +str remote_work - +str in_person_work - +str open_to_relocation - +str willing_to_complete_assessments - +str willing_to_undergo_drug_tests - +str willing_to_undergo_background_checks - } - - class Availability { - +str notice_period - } - - class SalaryExpectations { - +str salary_range_usd - } -``` - -## Job application process - -```mermaid -sequenceDiagram - participant Main - participant AIHawkEasyApplier - participant JobManager - participant GPTAnswerer - participant Browser - participant FileSystem - - Main->>AIHawkEasyApplier: apply_to_job(job) - activate AIHawkEasyApplier - - AIHawkEasyApplier->>AIHawkEasyApplier: job_apply(job) - AIHawkEasyApplier->>Browser: Navigate to job.link - - AIHawkEasyApplier->>AIHawkEasyApplier: check_for_premium_redirect(job) - - AIHawkEasyApplier->>Browser: Find Easy Apply button - AIHawkEasyApplier->>Browser: Get job description - AIHawkEasyApplier->>Browser: Get recruiter link - - AIHawkEasyApplier->>GPTAnswerer: set_job(job) - AIHawkEasyApplier->>GPTAnswerer: is_job_suitable() - - alt Job Not Suitable - GPTAnswerer-->>AIHawkEasyApplier: False - AIHawkEasyApplier->>JobManager: write_to_file(job, "skipped") - AIHawkEasyApplier-->>Main: Return - end - - AIHawkEasyApplier->>Browser: Click Easy Apply button - - AIHawkEasyApplier->>AIHawkEasyApplier: _fill_application_form(job) - - loop Until Form Complete - AIHawkEasyApplier->>AIHawkEasyApplier: fill_up(job) - - alt Upload Fields Found - AIHawkEasyApplier->>AIHawkEasyApplier: _create_and_upload_resume() - AIHawkEasyApplier->>FileSystem: Save resume PDF - AIHawkEasyApplier->>Browser: Upload resume - - AIHawkEasyApplier->>AIHawkEasyApplier: _create_and_upload_cover_letter() - AIHawkEasyApplier->>GPTAnswerer: Generate cover letter - AIHawkEasyApplier->>Browser: Upload cover letter - end - - alt Additional Questions Found - AIHawkEasyApplier->>AIHawkEasyApplier: _fill_additional_questions() - AIHawkEasyApplier->>FileSystem: Load answers.json - AIHawkEasyApplier->>GPTAnswerer: Generate new answers - AIHawkEasyApplier->>FileSystem: Save to answers.json - AIHawkEasyApplier->>Browser: Fill in answers - end - - AIHawkEasyApplier->>AIHawkEasyApplier: _next_or_submit() - AIHawkEasyApplier->>AIHawkEasyApplier: _check_for_errors() - end - - alt Application Successful - AIHawkEasyApplier->>JobManager: write_to_file(job, "success") - else Application Failed - AIHawkEasyApplier->>AIHawkEasyApplier: _discard_application() - AIHawkEasyApplier->>JobManager: write_to_file(job, "failed") - end - - deactivate AIHawkEasyApplier -``` diff --git a/docs/guide_to_autostart_aihawk.pdf b/docs/guide_to_autostart_aihawk.pdf deleted file mode 100644 index 5f06ec63b..000000000 Binary files a/docs/guide_to_autostart_aihawk.pdf and /dev/null differ diff --git a/docs/guide_to_setup_ollama_and_gemini.pdf b/docs/guide_to_setup_ollama_and_gemini.pdf deleted file mode 100644 index adf728ebb..000000000 Binary files a/docs/guide_to_setup_ollama_and_gemini.pdf and /dev/null differ diff --git a/docs/guide_yaml_sections.pdf b/docs/guide_yaml_sections.pdf deleted file mode 100644 index f772d7ab8..000000000 Binary files a/docs/guide_yaml_sections.pdf and /dev/null differ diff --git a/docs/workflow_diagrams.md b/docs/workflow_diagrams.md deleted file mode 100644 index 37bca33f2..000000000 --- a/docs/workflow_diagrams.md +++ /dev/null @@ -1,72 +0,0 @@ -# Dev diagrams - -Note: All diagrams are created using [Mermaid](https://mermaid.js.org/). - -## 1. Application flow - -```mermaid -graph TD - A[Start] --> B[Parse Command Line Arguments] - B --> C[Validate Data Folder] - C --> D[Load Configuration] - D --> E[Initialize Components] - E --> F{Collect Mode?} - F -->|Yes| G[Collect Job Data] - F -->|No| H[Start Job Application Process] - G --> I[Save Data to JSON] - H --> J[Login to AIHawk] - J --> K[Search for Jobs] - K --> L[Apply to Jobs] - L --> M[Generate Reports] - I --> N[End] - M --> N -``` - -## 2. Job application process - -```mermaid -sequenceDiagram - participant User - participant AIHawkBot - participant AIHawk - participant GPTAnswerer - participant ResumeGenerator - - User->>AIHawkBot: Start application process - AIHawkBot->>AIHawk: Login - AIHawkBot->>AIHawk: Search for jobs - loop For each job - AIHawkBot->>AIHawk: Open job listing - AIHawkBot->>GPTAnswerer: Generate answers for application questions - AIHawkBot->>ResumeGenerator: Generate tailored resume - AIHawkBot->>AIHawk: Fill application form - AIHawkBot->>AIHawk: Upload resume and cover letter - AIHawkBot->>AIHawk: Submit application - AIHawkBot->>AIHawkBot: Log application result - end - AIHawkBot->>User: Display application summary -``` - -## 3. Resume generation process - -```mermaid -graph TD - A[Start Resume Generation] --> B[Extract Job Description] - B --> C[Analyze Job Requirements] - C --> D[Retrieve User Profile] - D --> E[Generate Tailored Content] - E --> F[Create PDF Resume] - F --> G[Return Base64 Encoded PDF] - G --> H[End Resume Generation] -``` - -## 4. GPTAnswerer workflow - -```mermaid -graph LR - A[Receive Question] --> B[Prepare Prompt] - B --> C[Send to LLM Model] - C --> D[Receive Response] - D --> E[Parse Response] - E --> F[Return Formatted Answer] -``` diff --git a/main.py b/main.py index f9f05771f..534b16bcb 100644 --- a/main.py +++ b/main.py @@ -1,207 +1,204 @@ -import os -import re +import base64 import sys from pathlib import Path -import trace import traceback -import yaml +from typing import List, Optional, Tuple, Dict + import click +import inquirer +import yaml from selenium import webdriver +from selenium.common.exceptions import WebDriverException from selenium.webdriver.chrome.service import Service as ChromeService from webdriver_manager.chrome import ChromeDriverManager -from selenium.common.exceptions import WebDriverException -from lib_resume_builder_AIHawk import ( - Resume, - FacadeManager, - ResumeGenerator, - StyleManager, -) -from typing import Optional -from constants import LINKEDIN, PLAIN_TEXT_RESUME_YAML, SECRETS_YAML, WORK_PREFERENCES_YAML -from src.job_portals.base_job_portal import get_job_portal -from src.utils.chrome_utils import chrome_browser_options -import undetected_chromedriver as uc - -from src.job_application_profile import JobApplicationProfile +import re +from src.ai_hawk.libs.resume_and_cover_builder import ResumeFacade, ResumeGenerator, StyleManager +from src.ai_hawk.resume_schemas.job_application_profile import JobApplicationProfile +from src.ai_hawk.resume_schemas.resume import Resume from src.logging import logger - -# Suppress stderr only during specific operations -original_stderr = sys.stderr - -# Add the src directory to the Python path -sys.path.append(str(Path(__file__).resolve().parent / "src")) - - -from ai_hawk.bot_facade import AIHawkBotFacade -from ai_hawk.job_manager import AIHawkJobManager -from ai_hawk.llm.llm_manager import GPTAnswerer +from src.utils.chrome_utils import init_browser +from src.utils.constants import ( + PLAIN_TEXT_RESUME_YAML, + SECRETS_YAML, + WORK_PREFERENCES_YAML, +) +# from ai_hawk.bot_facade import AIHawkBotFacade +# from ai_hawk.job_manager import AIHawkJobManager +# from ai_hawk.llm.llm_manager import GPTAnswerer class ConfigError(Exception): + """Custom exception for configuration-related errors.""" pass class ConfigValidator: + """Validates configuration and secrets YAML files.""" + + EMAIL_REGEX = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$") + REQUIRED_CONFIG_KEYS = { + "remote": bool, + "experience_level": dict, + "job_types": dict, + "date": dict, + "positions": list, + "locations": list, + "location_blacklist": list, + "distance": int, + "company_blacklist": list, + "title_blacklist": list, + } + EXPERIENCE_LEVELS = [ + "internship", + "entry", + "associate", + "mid_senior_level", + "director", + "executive", + ] + JOB_TYPES = [ + "full_time", + "contract", + "part_time", + "temporary", + "internship", + "other", + "volunteer", + ] + DATE_FILTERS = ["all_time", "month", "week", "24_hours"] + APPROVED_DISTANCES = {0, 5, 10, 25, 50, 100} + @staticmethod def validate_email(email: str) -> bool: - return ( - re.match(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$", email) - is not None - ) + """Validate the format of an email address.""" + return bool(ConfigValidator.EMAIL_REGEX.match(email)) @staticmethod - def validate_yaml_file(yaml_path: Path) -> dict: + def load_yaml(yaml_path: Path) -> dict: + """Load and parse a YAML file.""" try: with open(yaml_path, "r") as stream: return yaml.safe_load(stream) except yaml.YAMLError as exc: - raise ConfigError(f"Error reading file {yaml_path}: {exc}") + raise ConfigError(f"Error reading YAML file {yaml_path}: {exc}") except FileNotFoundError: - raise ConfigError(f"File not found: {yaml_path}") - - @staticmethod - def validate_config(config_yaml_path: Path) -> dict: - parameters = ConfigValidator.validate_yaml_file(config_yaml_path) - required_keys = { - "remote": bool, - "experience_level": dict, - "job_types": dict, - "date": dict, - "positions": list, - "locations": list, - "location_blacklist": list, - "distance": int, - "company_blacklist": list, - "title_blacklist": list, - } - - for key, expected_type in required_keys.items(): + raise ConfigError(f"YAML file not found: {yaml_path}") + + @classmethod + def validate_config(cls, config_yaml_path: Path) -> dict: + """Validate the main configuration YAML file.""" + parameters = cls.load_yaml(config_yaml_path) + # Check for required keys and their types + for key, expected_type in cls.REQUIRED_CONFIG_KEYS.items(): if key not in parameters: - if key in [ - "company_blacklist", - "title_blacklist", - "location_blacklist", - ]: + if key in ["company_blacklist", "title_blacklist", "location_blacklist"]: parameters[key] = [] else: - raise ConfigError( - f"Missing or invalid key '{key}' in config file {config_yaml_path}" - ) + raise ConfigError(f"Missing required key '{key}' in {config_yaml_path}") elif not isinstance(parameters[key], expected_type): - if ( - key - in ["company_blacklist", "title_blacklist", "location_blacklist"] - and parameters[key] is None - ): + if key in ["company_blacklist", "title_blacklist", "location_blacklist"] and parameters[key] is None: parameters[key] = [] else: raise ConfigError( - f"Invalid type for key '{key}' in config file {config_yaml_path}. Expected {expected_type}." + f"Invalid type for key '{key}' in {config_yaml_path}. Expected {expected_type.__name__}." ) + cls._validate_experience_levels(parameters["experience_level"], config_yaml_path) + cls._validate_job_types(parameters["job_types"], config_yaml_path) + cls._validate_date_filters(parameters["date"], config_yaml_path) + cls._validate_list_of_strings(parameters, ["positions", "locations"], config_yaml_path) + cls._validate_distance(parameters["distance"], config_yaml_path) + cls._validate_blacklists(parameters, config_yaml_path) + return parameters - # Validate experience levels, ensure they are boolean - experience_levels = [ - "internship", - "entry", - "associate", - "mid_senior_level", - "director", - "executive", - ] - for level in experience_levels: - if not isinstance(parameters["experience_level"].get(level), bool): + @classmethod + def _validate_experience_levels(cls, experience_levels: dict, config_path: Path): + """Ensure experience levels are booleans.""" + for level in cls.EXPERIENCE_LEVELS: + if not isinstance(experience_levels.get(level), bool): raise ConfigError( - f"Experience level '{level}' must be a boolean in config file {config_yaml_path}" + f"Experience level '{level}' must be a boolean in {config_path}" ) - # Validate job types, ensure they are boolean - job_types = [ - "full_time", - "contract", - "part_time", - "temporary", - "internship", - "other", - "volunteer", - ] - for job_type in job_types: - if not isinstance(parameters["job_types"].get(job_type), bool): + @classmethod + def _validate_job_types(cls, job_types: dict, config_path: Path): + """Ensure job types are booleans.""" + for job_type in cls.JOB_TYPES: + if not isinstance(job_types.get(job_type), bool): raise ConfigError( - f"Job type '{job_type}' must be a boolean in config file {config_yaml_path}" + f"Job type '{job_type}' must be a boolean in {config_path}" ) - # Validate date filters - date_filters = ["all_time", "month", "week", "24_hours"] - for date_filter in date_filters: - if not isinstance(parameters["date"].get(date_filter), bool): + @classmethod + def _validate_date_filters(cls, date_filters: dict, config_path: Path): + """Ensure date filters are booleans.""" + for date_filter in cls.DATE_FILTERS: + if not isinstance(date_filters.get(date_filter), bool): raise ConfigError( - f"Date filter '{date_filter}' must be a boolean in config file {config_yaml_path}" + f"Date filter '{date_filter}' must be a boolean in {config_path}" ) - # Validate positions and locations as lists of strings - if not all(isinstance(pos, str) for pos in parameters["positions"]): - raise ConfigError( - f"'positions' must be a list of strings in config file {config_yaml_path}" - ) - if not all(isinstance(loc, str) for loc in parameters["locations"]): - raise ConfigError( - f"'locations' must be a list of strings in config file {config_yaml_path}" - ) + @classmethod + def _validate_list_of_strings(cls, parameters: dict, keys: list, config_path: Path): + """Ensure specified keys are lists of strings.""" + for key in keys: + if not all(isinstance(item, str) for item in parameters[key]): + raise ConfigError( + f"'{key}' must be a list of strings in {config_path}" + ) - # Validate distance - approved_distances = {0, 5, 10, 25, 50, 100} - if parameters["distance"] not in approved_distances: + @classmethod + def _validate_distance(cls, distance: int, config_path: Path): + """Validate the distance value.""" + if distance not in cls.APPROVED_DISTANCES: raise ConfigError( - f"Invalid distance value in config file {config_yaml_path}. Must be one of: {approved_distances}" + f"Invalid distance value '{distance}' in {config_path}. Must be one of: {cls.APPROVED_DISTANCES}" ) - # Ensure blacklists are lists + @classmethod + def _validate_blacklists(cls, parameters: dict, config_path: Path): + """Ensure blacklists are lists.""" for blacklist in ["company_blacklist", "title_blacklist", "location_blacklist"]: if not isinstance(parameters.get(blacklist), list): raise ConfigError( - f"'{blacklist}' must be a list in config file {config_yaml_path}" + f"'{blacklist}' must be a list in {config_path}" ) if parameters[blacklist] is None: parameters[blacklist] = [] - return parameters - @staticmethod def validate_secrets(secrets_yaml_path: Path) -> str: - secrets = ConfigValidator.validate_yaml_file(secrets_yaml_path) + """Validate the secrets YAML file and retrieve the LLM API key.""" + secrets = ConfigValidator.load_yaml(secrets_yaml_path) mandatory_secrets = ["llm_api_key"] for secret in mandatory_secrets: if secret not in secrets: - raise ConfigError( - f"Missing secret '{secret}' in file {secrets_yaml_path}" - ) + raise ConfigError(f"Missing secret '{secret}' in {secrets_yaml_path}") + + if not secrets[secret]: + raise ConfigError(f"Secret '{secret}' cannot be empty in {secrets_yaml_path}") - if not secrets["llm_api_key"]: - raise ConfigError( - f"llm_api_key cannot be empty in secrets file {secrets_yaml_path}." - ) return secrets["llm_api_key"] class FileManager: + """Handles file system operations and validations.""" + + REQUIRED_FILES = [SECRETS_YAML, WORK_PREFERENCES_YAML, PLAIN_TEXT_RESUME_YAML] + @staticmethod - def validate_data_folder(app_data_folder: Path) -> tuple: - if not app_data_folder.exists() or not app_data_folder.is_dir(): + def validate_data_folder(app_data_folder: Path) -> Tuple[Path, Path, Path, Path]: + """Validate the existence of the data folder and required files.""" + if not app_data_folder.is_dir(): raise FileNotFoundError(f"Data folder not found: {app_data_folder}") - required_files = [SECRETS_YAML, WORK_PREFERENCES_YAML, PLAIN_TEXT_RESUME_YAML] - missing_files = [ - file for file in required_files if not (app_data_folder / file).exists() - ] - + missing_files = [file for file in FileManager.REQUIRED_FILES if not (app_data_folder / file).exists()] if missing_files: - raise FileNotFoundError( - f"Missing files in the data folder: {', '.join(missing_files)}" - ) + raise FileNotFoundError(f"Missing files in data folder: {', '.join(missing_files)}") output_folder = app_data_folder / "output" output_folder.mkdir(exist_ok=True) + return ( app_data_folder / SECRETS_YAML, app_data_folder / WORK_PREFERENCES_YAML, @@ -210,137 +207,169 @@ def validate_data_folder(app_data_folder: Path) -> tuple: ) @staticmethod - def file_paths_to_dict( - resume_file: Path | None, plain_text_resume_file: Path - ) -> dict: + def get_uploads(plain_text_resume_file: Path) -> Dict[str, Path]: + """Convert resume file paths to a dictionary.""" if not plain_text_resume_file.exists(): - raise FileNotFoundError( - f"Plain text resume file not found: {plain_text_resume_file}" - ) - - result = {"plainTextResume": plain_text_resume_file} + raise FileNotFoundError(f"Plain text resume file not found: {plain_text_resume_file}") - if resume_file: - if not resume_file.exists(): - raise FileNotFoundError(f"Resume file not found: {resume_file}") - result["resume"] = resume_file + uploads = {"plainTextResume": plain_text_resume_file} - return result + return uploads -def init_browser() -> webdriver.Chrome: +def create_cv(parameters: dict, llm_api_key: str): + """ + Logic to create a CV. + """ try: - options = chrome_browser_options() - service = ChromeService(ChromeDriverManager().install()) - return webdriver.Chrome(service=service, options=options) - except Exception as e: - raise RuntimeError(f"Failed to initialize browser: {str(e)}") - - -def init_uc_browser() -> webdriver.Chrome: - try: - options = uc.ChromeOptions() - # Add any additional options you need - options.add_argument( - "--blink-settings=imagesEnabled=false" - ) # Optional: disable images - return uc.Chrome(options=options) - except Exception as e: - raise RuntimeError(f"Failed to initialize browser: {str(e)}") + logger.info("Generating a CV based on provided parameters.") + # Load plain text resume + with open(parameters["uploads"]["plainTextResume"], "r", encoding="utf-8") as file: + plain_text_resume = file.read() -def create_and_run_bot(parameters, llm_api_key): - try: style_manager = StyleManager() + style_manager.choose_style() + resume_generator = ResumeGenerator() - with open( - parameters["uploads"]["plainTextResume"], "r", encoding="utf-8" - ) as file: - plain_text_resume = file.read() resume_object = Resume(plain_text_resume) - resume_generator_manager = FacadeManager( - llm_api_key, - style_manager, - resume_generator, - resume_object, - Path("data_folder/output"), + driver = init_browser() + resume_generator.set_resume_object(resume_object) + resume_facade = ResumeFacade( + api_key=llm_api_key, + style_manager=style_manager, + resume_generator=resume_generator, + resume_object=resume_object, + output_path=Path("data_folder/output"), ) + resume_facade.set_driver(driver) + result_base64 = resume_facade.create_cover_letter("Software engineer with Java experience") - # Run the resume generator manager's functions if resume is not provided - if "resume" not in parameters["uploads"]: - resume_generator_manager.choose_style() + # Decode Base64 to binary data + try: + pdf_data = base64.b64decode(result_base64) + except base64.binascii.Error as e: + logger.error("Error decoding Base64: %s", e) + raise - job_application_profile_object = JobApplicationProfile(plain_text_resume) + # Define the output path + output_path = Path(parameters["outputFileDirectory"]) / "resume.pdf" - browser = init_uc_browser() - job_portal = get_job_portal( - driver=browser, portal_name=LINKEDIN, parameters=parameters - ) - login_component = job_portal.authenticator - apply_component = AIHawkJobManager(job_portal) - gpt_answerer_component = GPTAnswerer(parameters, llm_api_key) - bot = AIHawkBotFacade(login_component, apply_component) - bot.set_job_application_profile_and_resume( - job_application_profile_object, resume_object - ) - bot.set_gpt_answerer_and_resume_generator( - gpt_answerer_component, resume_generator_manager - ) - bot.set_parameters(parameters) - bot.start_login() - if parameters["collectMode"] == True: - logger.info("Collecting") - bot.start_collect_data() + # Write binary data to the PDF file + try: + with open(output_path, "wb") as file: + file.write(pdf_data) + logger.info(f"CV saved to {output_path}") + except IOError as e: + logger.error("Error writing file: %s", e) + raise + except Exception as e: + logger.exception(f"An error occurred while creating the CV: {e}") + raise + + +def create_cover_letter(parameters: dict, llm_api_key: str): + """ + Logic to create a cover letter. + """ + # try: + # logger.info("Generating a cover letter based on provided parameters.") + # # Example implementation for generating the letter + # cover_letter_generator = CoverLetterGenerator(llm_api_key) + # result = cover_letter_generator.generate(parameters) + # output_path = Path(parameters["outputFileDirectory"]) / "cover_letter.docx" + # with open(output_path, "w", encoding="utf-8") as file: + # file.write(result) + # logger.info(f"Cover letter saved to {output_path}") + # except Exception as e: + # logger.exception(f"An error occurred while creating the cover letter: {e}") + # raise + pass + +def handle_inquiries(selected_actions: List[str], parameters: dict, llm_api_key: str): + """ + Decide which function to call based on the selected user actions. + + :param selected_actions: List of actions selected by the user. + :param parameters: Configuration parameters dictionary. + :param llm_api_key: API key for the language model. + """ + try: + if selected_actions: + if "Create Cover Letter" in selected_actions: + logger.info("Creating a cover letter...") + create_cover_letter(parameters, llm_api_key) + if "Create CV" in selected_actions: + logger.info("Creating a CV...") + create_cv(parameters, llm_api_key) else: - logger.info("Applying") - bot.start_apply() - except WebDriverException as e: - logger.error(f"WebDriver error occurred: {e}") + logger.warning("No actions selected. Nothing to execute.") except Exception as e: - raise RuntimeError(f"Error running the bot: {str(e)}") + logger.exception(f"An error occurred while handling inquiries: {e}") + raise +def prompt_user_action() -> str: + """ + Use inquirer to ask the user which action they want to perform. -@click.command() -@click.option( - "--resume", - type=click.Path(exists=True, file_okay=True, dir_okay=False, path_type=Path), - help="Path to the resume PDF file", -) -@click.option( - "--collect", - is_flag=True, - help="Only collects data job information into data.json file", -) -def main(collect: bool = False, resume: Optional[Path] = None): + :return: Selected action. + """ try: + questions = [ + inquirer.List( + 'action', + message="Select the action you want to perform:", + choices=[ + "Create Cover Letter", + "Create CV", + ], + ), + ] + answer = inquirer.prompt(questions) + if answer is None: + print("No answer provided. The user may have interrupted.") + return "" + return answer.get('action', "") + except Exception as e: + print(f"An error occurred: {e}") + return "" + + +def main(): + """Main entry point for the AIHawk Job Application Bot.""" + try: + # Define and validate the data folder data_folder = Path("data_folder") - secrets_file, config_file, plain_text_resume_file, output_folder = ( - FileManager.validate_data_folder(data_folder) - ) + secrets_file, config_file, plain_text_resume_file, output_folder = FileManager.validate_data_folder(data_folder) - parameters = ConfigValidator.validate_config(config_file) + # Validate configuration and secrets + config = ConfigValidator.validate_config(config_file) llm_api_key = ConfigValidator.validate_secrets(secrets_file) - parameters["uploads"] = FileManager.file_paths_to_dict( - resume, plain_text_resume_file - ) - parameters["outputFileDirectory"] = output_folder - parameters["collectMode"] = collect + # Prepare parameters + config["uploads"] = FileManager.get_uploads(plain_text_resume_file) + config["outputFileDirectory"] = output_folder + + # Interactive prompt for user to select actions + selected_actions = prompt_user_action() + + # Handle selected actions and execute them + handle_inquiries(selected_actions, config, llm_api_key) - create_and_run_bot(parameters, llm_api_key) except ConfigError as ce: - logger.error(f"Configuration error: {str(ce)}") + logger.error(f"Configuration error: {ce}") logger.error( - f"Refer to the configuration guide for troubleshooting: https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk?tab=readme-ov-file#configuration {str(ce)}" + "Refer to the configuration guide for troubleshooting: " + "https://github.com/feder-cr/Auto_Jobs_Applier_AIHawk?tab=readme-ov-file#configuration" ) - except FileNotFoundError as fnf: - logger.error(f"File not found: {str(fnf)}") + logger.error(f"File not found: {fnf}") logger.error("Ensure all required files are present in the data folder.") except RuntimeError as re: - logger.error(f"Runtime error: {str(re)} {traceback.format_exc()}") + logger.error(f"Runtime error: {re}") + logger.debug(traceback.format_exc()) except Exception as e: - logger.error(f"An unexpected error occurred: {str(e)}") + logger.exception(f"An unexpected error occurred: {e}") if __name__ == "__main__": diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index b3e4f235d..000000000 --- a/pytest.ini +++ /dev/null @@ -1,6 +0,0 @@ -[pytest] -minversion = 6.0 -addopts = --strict-markers --tb=short --cov=src --cov-report=term-missing -testpaths = - tests -pythonpath = src \ No newline at end of file diff --git a/src/ai_hawk/authenticator.py b/src/ai_hawk/authenticator.py deleted file mode 100644 index a345c5d1b..000000000 --- a/src/ai_hawk/authenticator.py +++ /dev/null @@ -1,84 +0,0 @@ -import random -import time - -from abc import ABC, abstractmethod -from selenium.common.exceptions import NoSuchElementException, TimeoutException, NoAlertPresentException, TimeoutException, UnexpectedAlertPresentException -from selenium.webdriver.common.by import By -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.wait import WebDriverWait - -from src.logging import logger - -class AIHawkAuthenticator(ABC): - - @property - def home_url(self): - pass - - @abstractmethod - def navigate_to_login(self): - pass - - @property - def is_logged_in(self): - pass - - def __init__(self, driver): - self.driver = driver - logger.debug(f"AIHawkAuthenticator initialized with driver: {driver}") - - def start(self): - logger.info("Starting Chrome browser to log in to AIHawk.") - self.driver.get(self.home_url) - if self.is_logged_in: - logger.info("User is already logged in. Skipping login process.") - return - else: - logger.info("User is not logged in. Proceeding with login.") - self.handle_login() - - def handle_login(self): - try: - logger.info("Navigating to the AIHawk login page...") - self.navigate_to_login() - self.prompt_for_credentials() - except NoSuchElementException as e: - logger.error(f"Could not log in to AIHawk. Element not found: {e}") - self.handle_security_checks() - - - def prompt_for_credentials(self): - try: - logger.debug("Enter credentials...") - check_interval = 45 # Interval to log the current URL - elapsed_time = 0 - - while True: - # Bring the browser window to the front - current_window = self.driver.current_window_handle - self.driver.switch_to.window(current_window) - - # Log current URL every 4 seconds and remind the user to log in - current_url = self.driver.current_url - logger.info(f"Please login on {current_url}") - - # Check if the user is already on the feed page - if self.is_logged_in: - logger.debug("Login successful, redirected to feed page.") - break - else: - # Optionally wait for the password field (or any other element you expect on the login page) - WebDriverWait(self.driver, 60).until( - EC.presence_of_element_located((By.ID, "password")) - ) - logger.debug("Password field detected, waiting for login completion.") - - time.sleep(check_interval) - elapsed_time += check_interval - - except TimeoutException: - logger.error("Login form not found. Aborting login.") - - @abstractmethod - def handle_security_checks(self): - pass diff --git a/src/ai_hawk/bot_facade.py b/src/ai_hawk/bot_facade.py deleted file mode 100644 index f4b13d6b0..000000000 --- a/src/ai_hawk/bot_facade.py +++ /dev/null @@ -1,100 +0,0 @@ -from ai_hawk.job_manager import AIHawkJobManager -from src.logging import logger - - -class AIHawkBotState: - def __init__(self): - logger.debug("Initializing AIHawkBotState") - self.reset() - - def reset(self): - logger.debug("Resetting AIHawkBotState") - self.credentials_set = False - self.api_key_set = False - self.job_application_profile_set = False - self.gpt_answerer_set = False - self.parameters_set = False - self.logged_in = False - - def validate_state(self, required_keys): - logger.debug(f"Validating AIHawkBotState with required keys: {required_keys}") - for key in required_keys: - if not getattr(self, key): - logger.error(f"State validation failed: {key} is not set") - raise ValueError(f"{key.replace('_', ' ').capitalize()} must be set before proceeding.") - logger.debug("State validation passed") - - -class AIHawkBotFacade: - def __init__(self, login_component, apply_component): - logger.debug("Initializing AIHawkBotFacade") - self.login_component = login_component - self.apply_component : AIHawkJobManager = apply_component - self.state = AIHawkBotState() - self.job_application_profile = None - self.resume = None - self.email = None - self.password = None - self.parameters = None - - def set_job_application_profile_and_resume(self, job_application_profile, resume): - logger.debug("Setting job application profile and resume") - self._validate_non_empty(job_application_profile, "Job application profile") - self._validate_non_empty(resume, "Resume") - self.job_application_profile = job_application_profile - self.resume = resume - self.state.job_application_profile_set = True - logger.debug("Job application profile and resume set successfully") - - - def set_gpt_answerer_and_resume_generator(self, gpt_answerer_component, resume_generator_manager): - logger.debug("Setting GPT answerer and resume generator") - self._ensure_job_profile_and_resume_set() - gpt_answerer_component.set_job_application_profile(self.job_application_profile) - gpt_answerer_component.set_resume(self.resume) - self.apply_component.set_gpt_answerer(gpt_answerer_component) - self.apply_component.set_resume_generator_manager(resume_generator_manager) - self.state.gpt_answerer_set = True - logger.debug("GPT answerer and resume generator set successfully") - - def set_parameters(self, parameters): - logger.debug("Setting parameters") - self._validate_non_empty(parameters, "Parameters") - self.parameters = parameters - self.apply_component.set_parameters(parameters) - self.state.credentials_set = True - self.state.parameters_set = True - logger.debug("Parameters set successfully") - - def start_login(self): - logger.debug("Starting login process") - self.state.validate_state(['credentials_set']) - self.login_component.start() - self.state.logged_in = True - logger.debug("Login process completed successfully") - - def start_apply(self): - logger.debug("Starting apply process") - self.state.validate_state(['logged_in', 'job_application_profile_set', 'gpt_answerer_set', 'parameters_set']) - self.apply_component.start_applying() - logger.debug("Apply process started successfully") - - def start_collect_data(self): - logger.debug("Starting collecting data process") - self.state.validate_state(['logged_in', 'job_application_profile_set', 'gpt_answerer_set', 'parameters_set']) - self.apply_component.start_collecting_data() - logger.debug("Collecting data process started successfully") - - def _validate_non_empty(self, value, name): - logger.debug(f"Validating that {name} is not empty") - if not value: - logger.error(f"Validation failed: {name} is empty") - raise ValueError(f"{name} cannot be empty.") - logger.debug(f"Validation passed for {name}") - - def _ensure_job_profile_and_resume_set(self): - logger.debug("Ensuring job profile and resume are set") - if not self.state.job_application_profile_set: - logger.error("Job application profile and resume are not set") - raise ValueError("Job application profile and resume must be set before proceeding.") - logger.debug("Job profile and resume are set") diff --git a/src/ai_hawk/job_applier.py b/src/ai_hawk/job_applier.py deleted file mode 100644 index f2f30644e..000000000 --- a/src/ai_hawk/job_applier.py +++ /dev/null @@ -1,714 +0,0 @@ -import base64 -from calendar import c -import json -from math import log -from operator import is_ -import os -import random -import re -import time -import traceback -from typing import List, Optional, Any, Text, Tuple - -from httpx import HTTPStatusError -from regex import W -from reportlab.lib.pagesizes import A4 -from reportlab.pdfgen import canvas -from reportlab.pdfbase.pdfmetrics import stringWidth - -from selenium.webdriver.remote.webelement import WebElement -from selenium.webdriver.support import expected_conditions as EC - -from jobContext import JobContext -from job_application import JobApplication -from job_application_saver import ApplicationSaver -from job_portals.application_form_elements import SelectQuestion, TextBoxQuestionType -from job_portals.base_job_portal import BaseJobPage, BaseJobPortal - -from src.logging import logger -from src.job import Job -from src.ai_hawk.llm.llm_manager import GPTAnswerer - - -def question_already_exists_in_data(question: str, data: List[dict]) -> bool: - """ - Check if a question already exists in the data list. - - Args: - question: The question text to search for - data: List of question dictionaries to search through - - Returns: - bool: True if question exists, False otherwise - """ - return any(item["question"] == question for item in data) - - -class AIHawkJobApplier: - def __init__( - self, - job_portal: BaseJobPortal, - resume_dir: Optional[str], - set_old_answers: List[Tuple[str, str, str]], - gpt_answerer: GPTAnswerer, - resume_generator_manager, - ): - logger.debug("Initializing AIHawkEasyApplier") - if resume_dir is None or not os.path.exists(resume_dir): - resume_dir = None - self.job_page = job_portal.job_page - self.job_application_page = job_portal.application_page - self.resume_path = resume_dir - self.set_old_answers = set_old_answers - self.gpt_answerer = gpt_answerer - self.resume_generator_manager = resume_generator_manager - self.all_data = self._load_questions_from_json() - self.current_job : Job | None = None - - logger.debug("AIHawkEasyApplier initialized successfully") - - def _load_questions_from_json(self) -> List[dict]: - output_file = "answers.json" - logger.debug(f"Loading questions from JSON file: {output_file}") - try: - with open(output_file, "r") as f: - try: - data = json.load(f) - if not isinstance(data, list): - raise ValueError( - "JSON file format is incorrect. Expected a list of questions." - ) - except json.JSONDecodeError: - logger.error("JSON decoding failed") - data = [] - logger.debug("Questions loaded successfully from JSON") - return data - except FileNotFoundError: - logger.warning("JSON file not found, returning empty list") - return [] - except Exception: - tb_str = traceback.format_exc() - logger.error(f"Error loading questions data from JSON file: {tb_str}") - raise Exception( - f"Error loading questions data from JSON file: \nTraceback:\n{tb_str}" - ) - - def apply_to_job(self, job: Job) -> None: - """ - Starts the process of applying to a job. - :param job: A job object with the job details. - :return: None - """ - logger.debug(f"Applying to job: {job}") - try: - self.job_apply(job) - logger.info(f"Successfully applied to job: {job.title}") - except Exception as e: - logger.error(f"Failed to apply to job: {job.title}, error: {str(e)}") - raise e - - def job_apply(self, job: Job): - logger.debug(f"Starting job application for job: {job}") - job_context = JobContext() - job_context.job = job - job_context.job_application = JobApplication(job) - self.job_page.goto_job_page(job) - - try: - - job_description = self.job_page.get_job_description(job) - logger.debug(f"Job description set: {job_description[:100]}") - - job.set_job_description(job_description) - - recruiter_link = self.job_page.get_recruiter_link() - job.set_recruiter_link(recruiter_link) - - self.current_job = job - - logger.debug("Passing job information to GPT Answerer") - self.gpt_answerer.set_job(job) - - # Todo: add this job to skip list with it's reason - if not self.gpt_answerer.is_job_suitable(): - return - - self.job_page.click_apply_button(job_context) - - logger.debug("Filling out application form") - self._fill_application_form(job_context) - logger.debug( - f"Job application process completed successfully for job: {job}" - ) - - except Exception as e: - - tb_str = traceback.format_exc() - logger.error(f"Failed to apply to job: {job}, error: {tb_str}") - - logger.debug("Saving application process due to failure") - self.job_application_page.save() - - raise Exception( - f"Failed to apply to job! Original exception:\nTraceback:\n{tb_str}" - ) - - def _fill_application_form(self, job_context: JobContext): - job = job_context.job - job_application = job_context.job_application - logger.debug(f"Filling out application form for job: {job}") - - self.fill_up(job_context) - - while self.job_application_page.has_next_button(): - self.fill_up(job_context) - self.job_application_page.click_next_button() - self.job_application_page.handle_errors() - - if self.job_application_page.has_submit_button(): - self.job_application_page.click_submit_button() - ApplicationSaver.save(job_application) - logger.debug("Application form submitted") - return - - logger.warning(f"submit button not found, discarding application {job}") - - def fill_up(self, job_context: JobContext) -> None: - job = job_context.job - logger.debug(f"Filling up form sections for job: {job}") - - input_elements = self.job_application_page.get_input_elements() - - try: - for element in input_elements: - self._process_form_element(element, job_context) - - except Exception as e: - logger.error( - f"Failed to fill up form sections: {e} {traceback.format_exc()}" - ) - - def _process_form_element( - self, element: WebElement, job_context: JobContext - ) -> None: - logger.debug(f"Processing form element {element}") - if self.job_application_page.is_upload_field(element): - self._handle_upload_fields(element, job_context) - else: - self._fill_additional_questions(job_context) - - def _handle_upload_fields( - self, element: WebElement, job_context: JobContext - ) -> None: - logger.debug("Handling upload fields") - - file_upload_elements = self.job_application_page.get_file_upload_elements() - - for element in file_upload_elements: - - file_upload_element_heading = ( - self.job_application_page.get_upload_element_heading(element) - ) - - output = self.gpt_answerer.determine_resume_or_cover( - file_upload_element_heading - ) - - if "resume" in output: - logger.debug("Uploading resume") - if self.resume_path is not None and os.path.isfile(self.resume_path): - resume_file_path = os.path.abspath(self.resume_path) - self.job_application_page.upload_file(element, resume_file_path) - job_context.job.resume_path = resume_file_path - job_context.job_application.resume_path = str(resume_file_path) - logger.debug(f"Resume uploaded from path: {resume_file_path}") - else: - logger.debug( - "Resume path not found or invalid, generating new resume" - ) - self._create_and_upload_resume(element, job_context) - - elif "cover" in output: - logger.debug("Uploading cover letter") - self._create_and_upload_cover_letter(element, job_context) - - logger.debug("Finished handling upload fields") - - def _create_and_upload_resume(self, element, job_context: JobContext): - job = job_context.job - job_application = job_context.job_application - logger.debug("Starting the process of creating and uploading resume.") - folder_path = "generated_cv" - - try: - if not os.path.exists(folder_path): - logger.debug(f"Creating directory at path: {folder_path}") - os.makedirs(folder_path, exist_ok=True) - except Exception as e: - logger.error(f"Failed to create directory: {folder_path}. Error: {e}") - raise - - while True: - try: - timestamp = int(time.time()) - file_path_pdf = os.path.join(folder_path, f"CV_{timestamp}.pdf") - logger.debug(f"Generated file path for resume: {file_path_pdf}") - - logger.debug(f"Generating resume for job: {job.title} at {job.company}") - resume_pdf_base64 = self.resume_generator_manager.pdf_base64( - job_description_text=job.description - ) - with open(file_path_pdf, "xb") as f: - f.write(base64.b64decode(resume_pdf_base64)) - logger.debug( - f"Resume successfully generated and saved to: {file_path_pdf}" - ) - - break - except HTTPStatusError as e: - if e.response.status_code == 429: - - retry_after = e.response.headers.get("retry-after") - retry_after_ms = e.response.headers.get("retry-after-ms") - - if retry_after: - wait_time = int(retry_after) - logger.warning( - f"Rate limit exceeded, waiting {wait_time} seconds before retrying..." - ) - elif retry_after_ms: - wait_time = int(retry_after_ms) / 1000.0 - logger.warning( - f"Rate limit exceeded, waiting {wait_time} milliseconds before retrying..." - ) - else: - wait_time = 20 - logger.warning( - f"Rate limit exceeded, waiting {wait_time} seconds before retrying..." - ) - - time.sleep(wait_time) - else: - logger.error(f"HTTP error: {e}") - raise - - except Exception as e: - logger.error(f"Failed to generate resume: {e}") - tb_str = traceback.format_exc() - logger.error(f"Traceback: {tb_str}") - if "RateLimitError" in str(e): - logger.warning("Rate limit error encountered, retrying...") - time.sleep(20) - else: - raise - - file_size = os.path.getsize(file_path_pdf) - max_file_size = 2 * 1024 * 1024 # 2 MB - logger.debug(f"Resume file size: {file_size} bytes") - if file_size > max_file_size: - logger.error(f"Resume file size exceeds 2 MB: {file_size} bytes") - raise ValueError("Resume file size exceeds the maximum limit of 2 MB.") - - allowed_extensions = {".pdf", ".doc", ".docx"} - file_extension = os.path.splitext(file_path_pdf)[1].lower() - logger.debug(f"Resume file extension: {file_extension}") - if file_extension not in allowed_extensions: - logger.error(f"Invalid resume file format: {file_extension}") - raise ValueError( - "Resume file format is not allowed. Only PDF, DOC, and DOCX formats are supported." - ) - - try: - logger.debug(f"Uploading resume from path: {file_path_pdf}") - element.send_keys(os.path.abspath(file_path_pdf)) - job.resume_path = os.path.abspath(file_path_pdf) - job_application.resume_path = os.path.abspath(file_path_pdf) - time.sleep(2) - logger.debug(f"Resume created and uploaded successfully: {file_path_pdf}") - except Exception as e: - tb_str = traceback.format_exc() - logger.error(f"Resume upload failed: {tb_str}") - raise Exception(f"Upload failed: \nTraceback:\n{tb_str}") - - def _create_and_upload_cover_letter( - self, element: WebElement, job_context: JobContext - ) -> None: - job = job_context.job - logger.debug("Starting the process of creating and uploading cover letter.") - - cover_letter_text = self.gpt_answerer.answer_question_textual_wide_range( - "Write a cover letter" - ) - - folder_path = "generated_cv" - - try: - - if not os.path.exists(folder_path): - logger.debug(f"Creating directory at path: {folder_path}") - os.makedirs(folder_path, exist_ok=True) - except Exception as e: - logger.error(f"Failed to create directory: {folder_path}. Error: {e}") - raise - - while True: - try: - timestamp = int(time.time()) - file_path_pdf = os.path.join( - folder_path, f"Cover_Letter_{timestamp}.pdf" - ) - logger.debug(f"Generated file path for cover letter: {file_path_pdf}") - - c = canvas.Canvas(file_path_pdf, pagesize=A4) - page_width, page_height = A4 - text_object = c.beginText(50, page_height - 50) - text_object.setFont("Helvetica", 12) - - max_width = page_width - 100 - bottom_margin = 50 - available_height = page_height - bottom_margin - 50 - - def split_text_by_width(text, font, font_size, max_width): - wrapped_lines = [] - for line in text.splitlines(): - - if stringWidth(line, font, font_size) > max_width: - words = line.split() - new_line = "" - for word in words: - if ( - stringWidth(new_line + word + " ", font, font_size) - <= max_width - ): - new_line += word + " " - else: - wrapped_lines.append(new_line.strip()) - new_line = word + " " - wrapped_lines.append(new_line.strip()) - else: - wrapped_lines.append(line) - return wrapped_lines - - lines = split_text_by_width( - cover_letter_text, "Helvetica", 12, max_width - ) - - for line in lines: - text_height = text_object.getY() - if text_height > bottom_margin: - text_object.textLine(line) - else: - - c.drawText(text_object) - c.showPage() - text_object = c.beginText(50, page_height - 50) - text_object.setFont("Helvetica", 12) - text_object.textLine(line) - - c.drawText(text_object) - c.save() - logger.debug( - f"Cover letter successfully generated and saved to: {file_path_pdf}" - ) - - break - except Exception as e: - logger.error(f"Failed to generate cover letter: {e}") - tb_str = traceback.format_exc() - logger.error(f"Traceback: {tb_str}") - raise - - file_size = os.path.getsize(file_path_pdf) - max_file_size = 2 * 1024 * 1024 # 2 MB - logger.debug(f"Cover letter file size: {file_size} bytes") - if file_size > max_file_size: - logger.error(f"Cover letter file size exceeds 2 MB: {file_size} bytes") - raise ValueError( - "Cover letter file size exceeds the maximum limit of 2 MB." - ) - - allowed_extensions = {".pdf", ".doc", ".docx"} - file_extension = os.path.splitext(file_path_pdf)[1].lower() - logger.debug(f"Cover letter file extension: {file_extension}") - if file_extension not in allowed_extensions: - logger.error(f"Invalid cover letter file format: {file_extension}") - raise ValueError( - "Cover letter file format is not allowed. Only PDF, DOC, and DOCX formats are supported." - ) - - try: - - logger.debug(f"Uploading cover letter from path: {file_path_pdf}") - element.send_keys(os.path.abspath(file_path_pdf)) - job.cover_letter_path = os.path.abspath(file_path_pdf) - job_context.job_application.cover_letter_path = os.path.abspath( - file_path_pdf - ) - time.sleep(2) - logger.debug( - f"Cover letter created and uploaded successfully: {file_path_pdf}" - ) - except Exception as e: - tb_str = traceback.format_exc() - logger.error(f"Cover letter upload failed: {tb_str}") - raise Exception(f"Upload failed: \nTraceback:\n{tb_str}") - - def _fill_additional_questions(self, job_context: JobContext) -> None: - logger.debug("Filling additional questions") - form_sections = self.job_application_page.get_form_sections() - for section in form_sections: - self._process_form_section(job_context, section) - - def _process_form_section( - self, job_context: JobContext, section: WebElement - ) -> None: - logger.debug("Processing form section") - if self.job_application_page.is_terms_of_service(section): - logger.debug("Handled terms of service") - self.job_application_page.accept_terms_of_service(section) - return - - if self.job_application_page.is_radio_question(section): - radio_question = self.job_application_page.web_element_to_radio_question( - section - ) - self._handle_radio_question(job_context, radio_question, section) - logger.debug("Handled radio button") - return - - if self.job_application_page.is_textbox_question(section): - self._handle_textbox_question(job_context, section) - logger.debug("Handled textbox question") - return - - if self.job_application_page.is_dropdown_question(section): - self._handle_dropdown_question(job_context, section) - logger.debug("Handled dropdown question") - return - - def _handle_radio_question( - self, - job_context: JobContext, - radio_question: SelectQuestion, - section: WebElement, - ) -> None: - job_application = job_context.job_application - - question_text = radio_question.question - options = radio_question.options - - existing_answer = None - current_question_sanitized = self._sanitize_text(question_text) - for item in self.all_data: - if ( - current_question_sanitized in item["question"] - and item["type"] == "radio" - ): - existing_answer = item - break - - if existing_answer: - self.job_application_page.select_radio_option( - section, existing_answer["answer"] - ) - job_application.save_application_data(existing_answer) - logger.debug("Selected existing radio answer") - return - - answer = self.gpt_answerer.answer_question_from_options(question_text, options) - self._save_questions_to_json( - {"type": "radio", "question": question_text, "answer": answer} - ) - self.all_data = self._load_questions_from_json() - job_application.save_application_data( - {"type": "radio", "question": question_text, "answer": answer} - ) - self.job_application_page.select_radio_option(section, answer) - logger.debug("Selected new radio answer") - return - - def _handle_textbox_question( - self, job_context: JobContext, section: WebElement - ) -> None: - - textbox_question = self.job_application_page.web_element_to_textbox_question( - section - ) - - question_text = textbox_question.question - question_type = textbox_question.type.value - is_cover_letter = "cover letter" in question_text.lower() - is_numeric = textbox_question.type is TextBoxQuestionType.NUMERIC - - # Look for existing answer if it's not a cover letter field - existing_answer = None - if not is_cover_letter: - current_question_sanitized = self._sanitize_text(question_text) - for item in self.all_data: - if ( - item["question"] == current_question_sanitized - and item.get("type") == question_type - ): - existing_answer = item["answer"] - logger.debug(f"Found existing answer: {existing_answer}") - break - - if existing_answer and not is_cover_letter: - answer = existing_answer - logger.debug(f"Using existing answer: {answer}") - else: - if is_numeric: - answer = self.gpt_answerer.answer_question_numeric(question_text) - logger.debug(f"Generated numeric answer: {answer}") - else: - answer = self.gpt_answerer.answer_question_textual_wide_range( - question_text - ) - logger.debug(f"Generated textual answer: {answer}") - - # Save non-cover letter answers - if not is_cover_letter and not existing_answer: - self._save_questions_to_json( - {"type": question_type, "question": question_text, "answer": answer} - ) - self.all_data = self._load_questions_from_json() - logger.debug("Saved non-cover letter answer to JSON.") - - self.job_application_page.fill_textbox_question(section, answer) - logger.debug("Entered answer into the textbox.") - - job_context.job_application.save_application_data( - {"type": question_type, "question": question_text, "answer": answer} - ) - - return - - def _handle_dropdown_question( - self, job_context: JobContext, section: WebElement - ) -> None: - job_application = job_context.job_application - - dropdown = self.job_application_page.web_element_to_dropdown_question(section) - - question_text = dropdown.question - existing_answer = None - current_question_sanitized = self._sanitize_text(question_text) - options = dropdown.options - - for item in self.all_data: - if ( - current_question_sanitized in item["question"] - and item["type"] == "dropdown" - ): - existing_answer = item["answer"] - break - - if existing_answer: - logger.debug( - f"Found existing answer for question '{question_text}': {existing_answer}" - ) - job_application.save_application_data( - { - "type": "dropdown", - "question": question_text, - "answer": existing_answer, - } - ) - - answer = existing_answer - - else: - logger.debug( - f"No existing answer found, querying model for: {question_text}" - ) - answer = self.gpt_answerer.answer_question_from_options( - question_text, options - ) - self._save_questions_to_json( - { - "type": "dropdown", - "question": question_text, - "answer": answer, - } - ) - self.all_data = self._load_questions_from_json() - job_application.save_application_data( - { - "type": "dropdown", - "question": question_text, - "answer": answer, - } - ) - - self.job_application_page.select_dropdown_option(section, answer) - logger.debug(f"Selected new dropdown answer: {answer}") - return - - def _save_questions_to_json(self, question_data: dict) -> None: - output_file = "answers.json" - question_data["question"] = self._sanitize_text(question_data["question"]) - - logger.debug(f"Checking if question data already exists: {question_data}") - try: - with open(output_file, "r+") as f: - try: - data = json.load(f) - if not isinstance(data, list): - raise ValueError( - "JSON file format is incorrect. Expected a list of questions." - ) - except json.JSONDecodeError: - logger.error("JSON decoding failed") - data = [] - - should_be_saved: bool = not question_already_exists_in_data( - question_data["question"], data - ) and not self.answer_contians_company_name(question_data["answer"]) - - if should_be_saved: - logger.debug("New question found, appending to JSON") - data.append(question_data) - f.seek(0) - json.dump(data, f, indent=4) - f.truncate() - logger.debug("Question data saved successfully to JSON") - else: - logger.debug("Question already exists, skipping save") - except FileNotFoundError: - logger.warning("JSON file not found, creating new file") - with open(output_file, "w") as f: - json.dump([question_data], f, indent=4) - logger.debug("Question data saved successfully to new JSON file") - except Exception: - tb_str = traceback.format_exc() - logger.error(f"Error saving questions data to JSON file: {tb_str}") - raise Exception( - f"Error saving questions data to JSON file: \nTraceback:\n{tb_str}" - ) - - def _sanitize_text(self, text: str) -> str: - sanitized_text = text.lower().strip().replace('"', "").replace("\\", "") - sanitized_text = ( - re.sub(r"[\x00-\x1F\x7F]", "", sanitized_text) - .replace("\n", " ") - .replace("\r", "") - .rstrip(",") - ) - logger.debug(f"Sanitized text: {sanitized_text}") - return sanitized_text - - def _find_existing_answer(self, question_text): - for item in self.all_data: - if self._sanitize_text(item["question"]) == self._sanitize_text( - question_text - ): - return item - return None - - def answer_contians_company_name(self, answer: Any) -> bool: - return ( - isinstance(answer, str) - and self.current_job is not None - and self.current_job.company is not None - and self.current_job.company in answer - ) diff --git a/src/ai_hawk/job_manager.py b/src/ai_hawk/job_manager.py deleted file mode 100644 index d705cc59e..000000000 --- a/src/ai_hawk/job_manager.py +++ /dev/null @@ -1,430 +0,0 @@ -import json -import os -import random -import time -from itertools import product -from pathlib import Path -import traceback - -from inputimeout import inputimeout, TimeoutOccurred - -from ai_hawk.job_applier import AIHawkJobApplier -from config import JOB_MAX_APPLICATIONS, JOB_MIN_APPLICATIONS, MINIMUM_WAIT_TIME_IN_SECONDS - -from job_portals.base_job_portal import BaseJobPortal, get_job_portal -from src.job import Job -from src.logging import logger - -from src.regex_utils import look_ahead_patterns -import re - -import utils.browser_utils as browser_utils -import utils.time_utils - - -class EnvironmentKeys: - def __init__(self): - logger.debug("Initializing EnvironmentKeys") - self.skip_apply = self._read_env_key_bool("SKIP_APPLY") - self.disable_description_filter = self._read_env_key_bool("DISABLE_DESCRIPTION_FILTER") - logger.debug(f"EnvironmentKeys initialized: skip_apply={self.skip_apply}, disable_description_filter={self.disable_description_filter}") - - @staticmethod - def _read_env_key(key: str) -> str: - value = os.getenv(key, "") - logger.debug(f"Read environment key {key}: {value}") - return value - - @staticmethod - def _read_env_key_bool(key: str) -> bool: - value = os.getenv(key) == "True" - logger.debug(f"Read environment key {key} as bool: {value}") - return value - - -class AIHawkJobManager: - def __init__(self, job_portal : BaseJobPortal): - logger.debug("Initializing AIHawkJobManager") - self.job_portal = job_portal - self.set_old_answers = set() - self.easy_applier_component = None - logger.debug("AIHawkJobManager initialized successfully") - - def set_parameters(self, parameters): - logger.debug("Setting parameters for AIHawkJobManager") - self.company_blacklist = parameters.get('company_blacklist', []) or [] - self.title_blacklist = parameters.get('title_blacklist', []) or [] - self.location_blacklist = parameters.get('location_blacklist', []) or [] - self.positions = parameters.get('positions', []) - self.locations = parameters.get('locations', []) - self.apply_once_at_company = parameters.get('apply_once_at_company', False) - self.seen_jobs = [] - - self.min_applicants = JOB_MIN_APPLICATIONS - self.max_applicants = JOB_MAX_APPLICATIONS - - # Generate regex patterns from blacklist lists - self.title_blacklist_patterns = look_ahead_patterns(self.title_blacklist) - self.company_blacklist_patterns = look_ahead_patterns(self.company_blacklist) - self.location_blacklist_patterns = look_ahead_patterns(self.location_blacklist) - - resume_path = parameters.get('uploads', {}).get('resume', None) - self.resume_path = Path(resume_path) if resume_path and Path(resume_path).exists() else None - self.output_file_directory = Path(parameters['outputFileDirectory']) - self.env_config = EnvironmentKeys() - logger.debug("Parameters set successfully") - - def set_gpt_answerer(self, gpt_answerer): - logger.debug("Setting GPT answerer") - self.gpt_answerer = gpt_answerer - - def set_resume_generator_manager(self, resume_generator_manager): - logger.debug("Setting resume generator manager") - self.resume_generator_manager = resume_generator_manager - - def start_collecting_data(self): - searches = list(product(self.positions, self.locations)) - random.shuffle(searches) - page_sleep = 0 - minimum_time = 60 * 5 - minimum_page_time = time.time() + minimum_time - - for position, location in searches: - location_url = "&location=" + location - job_page_number = -1 - logger.info(f"Collecting data for {position} in {location}.",color="yellow") - try: - while True: - page_sleep += 1 - job_page_number += 1 - logger.info(f"Going to job page {job_page_number}", color="yellow") - self.job_portal.jobs_page.next_job_page(position, location_url, job_page_number) - utils.time_utils.medium_sleep() - logger.info("Starting the collecting process for this page", color="yellow") - self.read_jobs() - logger.info("Collecting data on this page has been completed!", color="yellow") - - time_left = minimum_page_time - time.time() - if time_left > 0: - logger.info(f"Sleeping for {time_left} seconds.",color="yellow") - time.sleep(time_left) - minimum_page_time = time.time() + minimum_time - if page_sleep % 5 == 0: - sleep_time = random.randint(1, 5) - logger.info(f"Sleeping for {sleep_time / 60} minutes.",color="yellow") - time.sleep(sleep_time) - page_sleep += 1 - except Exception: - pass - time_left = minimum_page_time - time.time() - if time_left > 0: - logger.info(f"Sleeping for {time_left} seconds.",color="yellow") - time.sleep(time_left) - minimum_page_time = time.time() + minimum_time - if page_sleep % 5 == 0: - sleep_time = random.randint(50, 90) - logger.info(f"Sleeping for {sleep_time / 60} minutes.",color="yellow") - time.sleep(sleep_time) - page_sleep += 1 - - def start_applying(self): - logger.debug("Starting job application process") - self.easy_applier_component = AIHawkJobApplier(self.job_portal, self.resume_path, self.set_old_answers, - self.gpt_answerer, self.resume_generator_manager) - searches = list(product(self.positions, self.locations)) - random.shuffle(searches) - page_sleep = 0 - minimum_time = MINIMUM_WAIT_TIME_IN_SECONDS - minimum_page_time = time.time() + minimum_time - - for position, location in searches: - location_url = "&location=" + location - job_page_number = -1 - logger.debug(f"Starting the search for {position} in {location}.") - - try: - while True: - page_sleep += 1 - job_page_number += 1 - logger.debug(f"Going to job page {job_page_number}") - self.job_portal.jobs_page.next_job_page(position, location_url, job_page_number) - utils.time_utils.medium_sleep() - logger.debug("Starting the application process for this page...") - - try: - jobs = self.job_portal.jobs_page.get_jobs_from_page(scroll=True) - if not jobs: - logger.debug("No more jobs found on this page. Exiting loop.") - break - except Exception as e: - logger.error(f"Failed to retrieve jobs: {e}") - break - - try: - self.apply_jobs() - except Exception as e: - logger.error(f"Error during job application: {e} {traceback.format_exc()}") - continue - - logger.debug("Applying to jobs on this page has been completed!") - - time_left = minimum_page_time - time.time() - - # Ask user if they want to skip waiting, with timeout - if time_left > 0: - try: - user_input = inputimeout( - prompt=f"Sleeping for {time_left} seconds. Press 'y' to skip waiting. Timeout 60 seconds : ", - timeout=60).strip().lower() - except TimeoutOccurred: - user_input = '' # No input after timeout - if user_input == 'y': - logger.debug("User chose to skip waiting.") - else: - logger.debug(f"Sleeping for {time_left} seconds as user chose not to skip.") - time.sleep(time_left) - - minimum_page_time = time.time() + minimum_time - - if page_sleep % 5 == 0: - sleep_time = random.randint(5, 34) - try: - user_input = inputimeout( - prompt=f"Sleeping for {sleep_time / 60} minutes. Press 'y' to skip waiting. Timeout 60 seconds : ", - timeout=60).strip().lower() - except TimeoutOccurred: - user_input = '' # No input after timeout - if user_input == 'y': - logger.debug("User chose to skip waiting.") - else: - logger.debug(f"Sleeping for {sleep_time} seconds.") - time.sleep(sleep_time) - page_sleep += 1 - except Exception as e: - logger.error(f"Unexpected error during job search: {e}") - continue - - time_left = minimum_page_time - time.time() - - if time_left > 0: - try: - user_input = inputimeout( - prompt=f"Sleeping for {time_left} seconds. Press 'y' to skip waiting. Timeout 60 seconds : ", - timeout=60).strip().lower() - except TimeoutOccurred: - user_input = '' # No input after timeout - if user_input == 'y': - logger.debug("User chose to skip waiting.") - else: - logger.debug(f"Sleeping for {time_left} seconds as user chose not to skip.") - time.sleep(time_left) - - minimum_page_time = time.time() + minimum_time - - if page_sleep % 5 == 0: - sleep_time = random.randint(50, 90) - try: - user_input = inputimeout( - prompt=f"Sleeping for {sleep_time / 60} minutes. Press 'y' to skip waiting: ", - timeout=60).strip().lower() - except TimeoutOccurred: - user_input = '' # No input after timeout - if user_input == 'y': - logger.debug("User chose to skip waiting.") - else: - logger.debug(f"Sleeping for {sleep_time} seconds.") - time.sleep(sleep_time) - page_sleep += 1 - - def read_jobs(self): - - job_element_list = self.job_portal.jobs_page.get_jobs_from_page() - job_list = [self.job_portal.jobs_page.job_tile_to_job(job_element) for job_element in job_element_list] - for job in job_list: - if self.is_blacklisted(job.title, job.company, job.link, job.location): - logger.info(f"Blacklisted {job.title} at {job.company} in {job.location}, skipping...") - self.write_to_file(job, "skipped") - continue - try: - self.write_to_file(job,'data') - except Exception as e: - self.write_to_file(job, "failed") - continue - - def apply_jobs(self): - job_element_list = self.job_portal.jobs_page.get_jobs_from_page() - - job_list = [self.job_portal.jobs_page.job_tile_to_job(job_element) for job_element in job_element_list] - - for job in job_list: - - logger.debug(f"Starting applicant for job: {job.title} at {job.company}") - #TODO fix apply threshold - """ - # Initialize applicants_count as None - applicants_count = None - - # Iterate over each job insight element to find the one containing the word "applicant" - for element in job_insight_elements: - logger.debug(f"Checking element text: {element.text}") - if "applicant" in element.text.lower(): - # Found an element containing "applicant" - applicants_text = element.text.strip() - logger.debug(f"Applicants text found: {applicants_text}") - - # Extract numeric digits from the text (e.g., "70 applicants" -> "70") - applicants_count = ''.join(filter(str.isdigit, applicants_text)) - logger.debug(f"Extracted applicants count: {applicants_count}") - - if applicants_count: - if "over" in applicants_text.lower(): - applicants_count = int(applicants_count) + 1 # Handle "over X applicants" - logger.debug(f"Applicants count adjusted for 'over': {applicants_count}") - else: - applicants_count = int(applicants_count) # Convert the extracted number to an integer - break - - # Check if applicants_count is valid (not None) before performing comparisons - if applicants_count is not None: - # Perform the threshold check for applicants count - if applicants_count < self.min_applicants or applicants_count > self.max_applicants: - logger.debug(f"Skipping {job.title} at {job.company}, applicants count: {applicants_count}") - self.write_to_file(job, "skipped_due_to_applicants") - continue # Skip this job if applicants count is outside the threshold - else: - logger.debug(f"Applicants count {applicants_count} is within the threshold") - else: - # If no applicants count was found, log a warning but continue the process - logger.warning( - f"Applicants count not found for {job.title} at {job.company}, continuing with application.") - except NoSuchElementException: - # Log a warning if the job insight elements are not found, but do not stop the job application process - logger.warning( - f"Applicants count elements not found for {job.title} at {job.company}, continuing with application.") - except ValueError as e: - # Handle errors when parsing the applicants count - logger.error(f"Error parsing applicants count for {job.title} at {job.company}: {e}") - except Exception as e: - # Catch any other exceptions to ensure the process continues - logger.error( - f"Unexpected error during applicants count processing for {job.title} at {job.company}: {e}") - - # Continue with the job application process regardless of the applicants count check - """ - - - if self.is_previously_failed_to_apply(job.link): - logger.debug(f"Previously failed to apply for {job.title} at {job.company}, skipping...") - continue - if self.is_blacklisted(job.title, job.company, job.link, job.location): - logger.debug(f"Job blacklisted: {job.title} at {job.company} in {job.location}") - self.write_to_file(job, "skipped", "Job blacklisted") - continue - if self.is_already_applied_to_job(job.title, job.company, job.link): - self.write_to_file(job, "skipped", "Already applied to this job") - continue - if self.is_already_applied_to_company(job.company): - self.write_to_file(job, "skipped", "Already applied to this company") - continue - try: - if job.apply_method not in {"Continue", "Applied", "Apply"}: - self.easy_applier_component.job_apply(job) - self.write_to_file(job, "success") - logger.debug(f"Applied to job: {job.title} at {job.company}") - except Exception as e: - logger.error(f"Failed to apply for {job.title} at {job.company}: {e}",exc_info=True) - self.write_to_file(job, "failed", f"Application error: {str(e)}") - continue - - def write_to_file(self, job : Job, file_name, reason=None): - logger.debug(f"Writing job application result to file: {file_name}") - pdf_path = Path(job.resume_path).resolve() - pdf_path = pdf_path.as_uri() - data = { - "company": job.company, - "job_title": job.title, - "link": job.link, - "job_recruiter": job.recruiter_link, - "job_location": job.location, - "pdf_path": pdf_path - } - - if reason: - data["reason"] = reason - - file_path = self.output_file_directory / f"{file_name}.json" - if not file_path.exists(): - with open(file_path, 'w', encoding='utf-8') as f: - json.dump([data], f, indent=4) - logger.debug(f"Job data written to new file: {file_name}") - else: - with open(file_path, 'r+', encoding='utf-8') as f: - try: - existing_data = json.load(f) - except json.JSONDecodeError: - logger.error(f"JSON decode error in file: {file_path}") - existing_data = [] - existing_data.append(data) - f.seek(0) - json.dump(existing_data, f, indent=4) - f.truncate() - logger.debug(f"Job data appended to existing file: {file_name}") - - def is_blacklisted(self, job_title, company, link, job_location): - logger.debug(f"Checking if job is blacklisted: {job_title} at {company} in {job_location}") - title_blacklisted = any(re.search(pattern, job_title, re.IGNORECASE) for pattern in self.title_blacklist_patterns) - company_blacklisted = any(re.search(pattern, company, re.IGNORECASE) for pattern in self.company_blacklist_patterns) - location_blacklisted = any(re.search(pattern, job_location, re.IGNORECASE) for pattern in self.location_blacklist_patterns) - link_seen = link in self.seen_jobs - is_blacklisted = title_blacklisted or company_blacklisted or location_blacklisted or link_seen - logger.debug(f"Job blacklisted status: {is_blacklisted}") - - return is_blacklisted - - def is_already_applied_to_job(self, job_title, company, link): - link_seen = link in self.seen_jobs - if link_seen: - logger.debug(f"Already applied to job: {job_title} at {company}, skipping...") - return link_seen - - def is_already_applied_to_company(self, company): - if not self.apply_once_at_company: - return False - - output_files = ["success.json"] - for file_name in output_files: - file_path = self.output_file_directory / file_name - if file_path.exists(): - with open(file_path, 'r', encoding='utf-8') as f: - try: - existing_data = json.load(f) - for applied_job in existing_data: - if applied_job['company'].strip().lower() == company.strip().lower(): - logger.debug( - f"Already applied at {company} (once per company policy), skipping...") - return True - except json.JSONDecodeError: - continue - return False - - def is_previously_failed_to_apply(self, link): - file_name = "failed" - file_path = self.output_file_directory / f"{file_name}.json" - - if not file_path.exists(): - with open(file_path, "w", encoding="utf-8") as f: - json.dump([], f) - - with open(file_path, 'r', encoding='utf-8') as f: - try: - existing_data = json.load(f) - except json.JSONDecodeError: - logger.error(f"JSON decode error in file: {file_path}") - return False - - for data in existing_data: - data_link = data['link'] - if data_link == link: - return True - - return False diff --git a/src/ai_hawk/llm/llm_manager.py b/src/ai_hawk/libs/llm_manager.py similarity index 99% rename from src/ai_hawk/llm/llm_manager.py rename to src/ai_hawk/libs/llm_manager.py index ae627c364..c7db55f2e 100644 --- a/src/ai_hawk/llm/llm_manager.py +++ b/src/ai_hawk/libs/llm_manager.py @@ -19,7 +19,7 @@ import ai_hawk.llm.prompts as prompts from config import JOB_SUITABILITY_SCORE -from constants import ( +from src.utils.constants import ( AVAILABILITY, CERTIFICATIONS, CLAUDE, diff --git a/src/ai_hawk/libs/resume_and_cover_builder/__init__.py b/src/ai_hawk/libs/resume_and_cover_builder/__init__.py new file mode 100644 index 000000000..398492d17 --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/__init__.py @@ -0,0 +1,6 @@ +__version__ = '0.1' + +# Import all the necessary classes and functions, called when the package is imported +from .resume_generator import ResumeGenerator +from .style_manager import StyleManager +from .resume_facade import ResumeFacade \ No newline at end of file diff --git a/src/ai_hawk/libs/resume_and_cover_builder/config.py b/src/ai_hawk/libs/resume_and_cover_builder/config.py new file mode 100644 index 000000000..0f9f162ef --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/config.py @@ -0,0 +1,34 @@ +""" +This module is used to store the global configuration of the application. +""" +# app/libs/resume_and_cover_builder/config.py +from pathlib import Path + +class GlobalConfig: + def __init__(self): + self.STRINGS_MODULE_RESUME_PATH: Path = None + self.STRINGS_MODULE_RESUME_JOB_DESCRIPTION_PATH: Path = None + self.STRINGS_MODULE_COVER_LETTER_JOB_DESCRIPTION_PATH: Path = None + self.STRINGS_MODULE_NAME: str = None + self.STYLES_DIRECTORY: Path = None + self.LOG_OUTPUT_FILE_PATH: Path = None + self.API_KEY: str = None + self.html_template = """ + + + + + + Resume + + + + + + + $body + + + """ + +global_config = GlobalConfig() diff --git a/src/job_portals/__init__.py b/src/ai_hawk/libs/resume_and_cover_builder/cover_letter_prompt/__init__.py similarity index 100% rename from src/job_portals/__init__.py rename to src/ai_hawk/libs/resume_and_cover_builder/cover_letter_prompt/__init__.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/cover_letter_prompt/strings_feder-cr.py b/src/ai_hawk/libs/resume_and_cover_builder/cover_letter_prompt/strings_feder-cr.py new file mode 100644 index 000000000..5113fa6fe --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/cover_letter_prompt/strings_feder-cr.py @@ -0,0 +1,52 @@ +from src.ai_hawk.libs.resume_and_cover_builder.template_base import prompt_cover_letter_template + + +cover_letter_template = """ +Compose a brief and impactful cover letter based on the provided job description and resume. The letter should be no longer than three paragraphs and should be written in a professional, yet conversational tone. Avoid using any placeholders, and ensure that the letter flows naturally and is tailored to the job. + +Analyze the job description to identify key qualifications and requirements. Introduce the candidate succinctly, aligning their career objectives with the role. Highlight relevant skills and experiences from the resume that directly match the job’s demands, using specific examples to illustrate these qualifications. Reference notable aspects of the company, such as its mission or values, that resonate with the candidate’s professional goals. Conclude with a strong statement of why the candidate is a good fit for the position, expressing a desire to discuss further. + +Please write the cover letter in a way that directly addresses the job role and the company’s characteristics, ensuring it remains concise and engaging without unnecessary embellishments. The letter should be formatted into paragraphs and should not include a greeting or signature. + +## Rules: +- Do not include any introductions, explanations, or additional information. + +## Details : +- **Job Description:** +``` +{job_description} +``` +- **My resume:** +``` +{resume} +``` +"""+ prompt_cover_letter_template + + +summarize_prompt_template = """ +As a seasoned HR expert, your task is to identify and outline the key skills and requirements necessary for the position of this job. Use the provided job description as input to extract all relevant information. This will involve conducting a thorough analysis of the job's responsibilities and the industry standards. You should consider both the technical and soft skills needed to excel in this role. Additionally, specify any educational qualifications, certifications, or experiences that are essential. Your analysis should also reflect on the evolving nature of this role, considering future trends and how they might affect the required competencies. + +Rules: +Remove boilerplate text +Include only relevant information to match the job description against the resume + +# Analysis Requirements +Your analysis should include the following sections: +Technical Skills: List all the specific technical skills required for the role based on the responsibilities described in the job description. +Soft Skills: Identify the necessary soft skills, such as communication abilities, problem-solving, time management, etc. +Educational Qualifications and Certifications: Specify the essential educational qualifications and certifications for the role. +Professional Experience: Describe the relevant work experiences that are required or preferred. +Role Evolution: Analyze how the role might evolve in the future, considering industry trends and how these might influence the required skills. + +# Final Result: +Your analysis should be structured in a clear and organized document with distinct sections for each of the points listed above. Each section should contain: +This comprehensive overview will serve as a guideline for the recruitment process, ensuring the identification of the most qualified candidates. + +# Job Description: +``` +{text} +``` + +--- + +# Job Description Summary""" \ No newline at end of file diff --git a/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_cover_letter_from_job.py b/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_cover_letter_from_job.py new file mode 100644 index 000000000..071c45bb9 --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_cover_letter_from_job.py @@ -0,0 +1,91 @@ +""" +This creates the cover letter (in html, utils will then convert in PDF) matching with job description and plain-text resume +""" +# app/libs/resume_and_cover_builder/llm_generate_cover_letter_from_job.py +import os +import textwrap +from ..utils import LoggerChatModel +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate +from langchain_openai import ChatOpenAI, OpenAIEmbeddings +from pathlib import Path +from dotenv import load_dotenv +from requests.exceptions import HTTPError as HTTPStatusError +from pathlib import Path +from loguru import logger + +# Load environment variables from .env file +load_dotenv() + +# Configure log file +log_folder = 'log/cover_letter/gpt_cover_letter_job_descr' +if not os.path.exists(log_folder): + os.makedirs(log_folder) +log_path = Path(log_folder).resolve() +logger.add(log_path / "gpt_cover_letter_job_descr.log", rotation="1 day", compression="zip", retention="7 days", level="DEBUG") + +class LLMCoverLetterJobDescription: + def __init__(self, openai_api_key, strings): + self.llm_cheap = LoggerChatModel(ChatOpenAI(model_name="gpt-4o-mini", openai_api_key=openai_api_key, temperature=0.4)) + self.llm_embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) + self.strings = strings + + @staticmethod + def _preprocess_template_string(template: str) -> str: + """ + Preprocess the template string by removing leading whitespace and indentation. + Args: + template (str): The template string to preprocess. + Returns: + str: The preprocessed template string. + """ + return textwrap.dedent(template) + + def set_resume(self, resume) -> None: + """ + Set the resume text to be used for generating the cover letter. + Args: + resume (str): The plain text resume to be used. + """ + self.resume = resume + + def set_job_description_from_text(self, job_description_text) -> None: + """ + Set the job description text to be used for generating the cover letter. + Args: + job_description_text (str): The plain text job description to be used. + """ + logger.debug("Starting job description summarization...") + prompt = ChatPromptTemplate.from_template(self.strings.summarize_prompt_template) + chain = prompt | self.llm_cheap | StrOutputParser() + output = chain.invoke({"text": job_description_text}) + self.job_description = output + logger.debug(f"Job description summarization complete: {self.job_description}") + + def generate_cover_letter(self) -> str: + """ + Generate the cover letter based on the job description and resume. + Returns: + str: The generated cover letter + """ + logger.debug("Starting cover letter generation...") + prompt_template = self._preprocess_template_string(self.strings.cover_letter_template) + logger.debug(f"Cover letter template after preprocessing: {prompt_template}") + + prompt = ChatPromptTemplate.from_template(prompt_template) + logger.debug(f"Prompt created: {prompt}") + + chain = prompt | self.llm_cheap | StrOutputParser() + logger.debug(f"Chain created: {chain}") + + input_data = { + "job_description": self.job_description, + "resume": self.resume + } + logger.debug(f"Input data: {input_data}") + + output = chain.invoke(input_data) + logger.debug(f"Cover letter generation result: {output}") + + logger.debug("Cover letter generation completed") + return output diff --git a/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_resume.py b/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_resume.py new file mode 100644 index 000000000..b324d362d --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_resume.py @@ -0,0 +1,322 @@ +""" +Create a class that generates a resume based on a resume and a resume template. +""" +# app/libs/resume_and_cover_builder/gpt_resume.py +import os +import textwrap +from src.ai_hawk.libs.resume_and_cover_builder.utils import LoggerChatModel +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate +from langchain_openai import ChatOpenAI +from dotenv import load_dotenv +from concurrent.futures import ThreadPoolExecutor, as_completed +from loguru import logger +from pathlib import Path + +# Load environment variables from .env file +load_dotenv() + +# Configure log file +log_folder = 'log/resume/gpt_resume' +if not os.path.exists(log_folder): + os.makedirs(log_folder) +log_path = Path(log_folder).resolve() +logger.add(log_path / "gpt_resume.log", rotation="1 day", compression="zip", retention="7 days", level="DEBUG") + +class LLMResumer: + def __init__(self, openai_api_key, strings): + self.llm_cheap = LoggerChatModel( + ChatOpenAI( + model_name="gpt-4o-mini", openai_api_key=openai_api_key, temperature=0.4 + ) + ) + self.strings = strings + + @staticmethod + def _preprocess_template_string(template: str) -> str: + """ + Preprocess the template string by removing leading whitespace and indentation. + Args: + template (str): The template string to preprocess. + Returns: + str: The preprocessed template string. + """ + return textwrap.dedent(template) + + def set_resume(self, resume) -> None: + """ + Set the resume object to be used for generating the resume. + Args: + resume (Resume): The resume object to be used. + """ + self.resume = resume + + def generate_header(self, data = None) -> str: + """ + Generate the header section of the resume. + Args: + data (dict): The personal information to use for generating the header. + Returns: + str: The generated header section. + """ + header_prompt_template = self._preprocess_template_string( + self.strings.prompt_header + ) + prompt = ChatPromptTemplate.from_template(header_prompt_template) + chain = prompt | self.llm_cheap | StrOutputParser() + input_data = { + "personal_information": self.resume.personal_information + } if data is None else data + output = chain.invoke(input_data) + return output + + def generate_education_section(self, data = None) -> str: + """ + Generate the education section of the resume. + Args: + data (dict): The education details to use for generating the education section. + Returns: + str: The generated education section. + """ + logger.debug("Starting education section generation") + + education_prompt_template = self._preprocess_template_string(self.strings.prompt_education) + logger.debug(f"Education template: {education_prompt_template}") + + prompt = ChatPromptTemplate.from_template(education_prompt_template) + logger.debug(f"Prompt: {prompt}") + + chain = prompt | self.llm_cheap | StrOutputParser() + logger.debug(f"Chain created: {chain}") + + input_data = { + "education_details": self.resume.education_details + } if data is None else data + output = chain.invoke(input_data) + logger.debug(f"Chain invocation result: {output}") + + logger.debug("Education section generation completed") + return output + + def generate_work_experience_section(self, data = None) -> str: + """ + Generate the work experience section of the resume. + Args: + data (dict): The work experience details to use for generating the work experience section. + Returns: + str: The generated work experience section. + """ + logger.debug("Starting work experience section generation") + + work_experience_prompt_template = self._preprocess_template_string(self.strings.prompt_working_experience) + logger.debug(f"Work experience template: {work_experience_prompt_template}") + + prompt = ChatPromptTemplate.from_template(work_experience_prompt_template) + logger.debug(f"Prompt: {prompt}") + + chain = prompt | self.llm_cheap | StrOutputParser() + logger.debug(f"Chain created: {chain}") + + input_data = { + "experience_details": self.resume.experience_details + } if data is None else data + output = chain.invoke(input_data) + logger.debug(f"Chain invocation result: {output}") + + logger.debug("Work experience section generation completed") + return output + + def generate_projects_section(self, data = None) -> str: + """ + Generate the side projects section of the resume. + Args: + data (dict): The side projects to use for generating the side projects section. + Returns: + str: The generated side projects section. + """ + logger.debug("Starting side projects section generation") + + projects_prompt_template = self._preprocess_template_string(self.strings.prompt_projects) + logger.debug(f"Side projects template: {projects_prompt_template}") + + prompt = ChatPromptTemplate.from_template(projects_prompt_template) + logger.debug(f"Prompt: {prompt}") + + chain = prompt | self.llm_cheap | StrOutputParser() + logger.debug(f"Chain created: {chain}") + + input_data = { + "projects": self.resume.projects + } if data is None else data + output = chain.invoke(input_data) + logger.debug(f"Chain invocation result: {output}") + + logger.debug("Side projects section generation completed") + return output + + def generate_achievements_section(self, data = None) -> str: + """ + Generate the achievements section of the resume. + Args: + data (dict): The achievements to use for generating the achievements section. + Returns: + str: The generated achievements section. + """ + logger.debug("Starting achievements section generation") + + achievements_prompt_template = self._preprocess_template_string(self.strings.prompt_achievements) + logger.debug(f"Achievements template: {achievements_prompt_template}") + + prompt = ChatPromptTemplate.from_template(achievements_prompt_template) + logger.debug(f"Prompt: {prompt}") + + chain = prompt | self.llm_cheap | StrOutputParser() + logger.debug(f"Chain created: {chain}") + + input_data = { + "achievements": self.resume.achievements, + "certifications": self.resume.certifications, + } if data is None else data + logger.debug(f"Input data for the chain: {input_data}") + + output = chain.invoke(input_data) + logger.debug(f"Chain invocation result: {output}") + + logger.debug("Achievements section generation completed") + return output + + def generate_certifications_section(self, data = None) -> str: + """ + Generate the certifications section of the resume. + Returns: + str: The generated certifications section. + """ + logger.debug("Starting Certifications section generation") + + certifications_prompt_template = self._preprocess_template_string(self.strings.prompt_certifications) + logger.debug(f"Certifications template: {certifications_prompt_template}") + + prompt = ChatPromptTemplate.from_template(certifications_prompt_template) + logger.debug(f"Prompt: {prompt}") + + chain = prompt | self.llm_cheap | StrOutputParser() + logger.debug(f"Chain created: {chain}") + + input_data = { + "certifications": self.resume.certifications + } if data is None else data + logger.debug(f"Input data for the chain: {input_data}") + + output = chain.invoke(input_data) + logger.debug(f"Chain invocation result: {output}") + + logger.debug("Certifications section generation completed") + return output + + def generate_additional_skills_section(self, data = None) -> str: + """ + Generate the additional skills section of the resume. + Returns: + str: The generated additional skills section. + """ + additional_skills_prompt_template = self._preprocess_template_string(self.strings.prompt_additional_skills) + + skills = set() + if self.resume.experience_details: + for exp in self.resume.experience_details: + if exp.skills_acquired: + skills.update(exp.skills_acquired) + + if self.resume.education_details: + for edu in self.resume.education_details: + if edu.exam: + for exam in edu.exam: + skills.update(exam.keys()) + prompt = ChatPromptTemplate.from_template(additional_skills_prompt_template) + chain = prompt | self.llm_cheap | StrOutputParser() + input_data = { + "languages": self.resume.languages, + "interests": self.resume.interests, + "skills": skills, + } if data is None else data + output = chain.invoke(input_data) + + return output + + def generate_html_resume(self) -> str: + """ + Generate the full HTML resume based on the resume object. + Returns: + str: The generated HTML resume. + """ + def header_fn(): + if self.resume.personal_information: + return self.generate_header() + return "" + + def education_fn(): + if self.resume.education_details: + return self.generate_education_section() + return "" + + def work_experience_fn(): + if self.resume.experience_details: + return self.generate_work_experience_section() + return "" + + def projects_fn(): + if self.resume.projects: + return self.generate_projects_section() + return "" + + def achievements_fn(): + if self.resume.achievements: + return self.generate_achievements_section() + return "" + + def certifications_fn(): + if self.resume.certifications: + return self.generate_certifications_section() + return "" + + def additional_skills_fn(): + if (self.resume.experience_details or self.resume.education_details or + self.resume.languages or self.resume.interests): + return self.generate_additional_skills_section() + return "" + + # Create a dictionary to map the function names to their respective callables + functions = { + "header": header_fn, + "education": education_fn, + "work_experience": work_experience_fn, + "projects": projects_fn, + "achievements": achievements_fn, + "certifications": certifications_fn, + "additional_skills": additional_skills_fn, + } + + # Use ThreadPoolExecutor to run the functions in parallel + with ThreadPoolExecutor() as executor: + future_to_section = {executor.submit(fn): section for section, fn in functions.items()} + results = {} + for future in as_completed(future_to_section): + section = future_to_section[future] + try: + result = future.result() + if result: + results[section] = result + except Exception as exc: + logger.error(f'{section} raised an exception: {exc}') + full_resume = "\n" + full_resume += f" {results.get('header', '')}\n" + full_resume += "
\n" + full_resume += f" {results.get('education', '')}\n" + full_resume += f" {results.get('work_experience', '')}\n" + full_resume += f" {results.get('projects', '')}\n" + full_resume += f" {results.get('achievements', '')}\n" + full_resume += f" {results.get('certifications', '')}\n" + full_resume += f" {results.get('additional_skills', '')}\n" + full_resume += "
\n" + full_resume += "" + return full_resume diff --git a/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_resume_from_job.py b/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_resume_from_job.py new file mode 100644 index 000000000..8ab861a61 --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_resume_from_job.py @@ -0,0 +1,134 @@ +""" +Create a class that generates a job description based on a resume and a job description template. +""" +# app/libs/resume_and_cover_builder/llm_generate_resume_from_job.py +import os +from src.ai_hawk.libs.resume_and_cover_builder.llm.llm_generate_resume import LLMResumer +from src.ai_hawk.libs.resume_and_cover_builder.utils import LoggerChatModel +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate +from langchain_openai import ChatOpenAI +from dotenv import load_dotenv +from loguru import logger +from pathlib import Path + +# Load environment variables from .env file +load_dotenv() + +log_folder = 'log/resume/gpt_resum_job_descr' +if not os.path.exists(log_folder): + os.makedirs(log_folder) +log_path = Path(log_folder).resolve() +logger.add(log_path / "gpt_resum_job_descr.log", rotation="1 day", compression="zip", retention="7 days", level="DEBUG") + +class LLMResumeJobDescription(LLMResumer): + def __init__(self, openai_api_key, strings): + super().__init__(openai_api_key, strings) + + def set_job_description_from_text(self, job_description_text) -> None: + """ + Set the job description text to be used for generating the resume. + Args: + job_description_text (str): The plain text job description to be used. + """ + prompt = ChatPromptTemplate.from_template(self.strings.summarize_prompt_template) + chain = prompt | self.llm_cheap | StrOutputParser() + output = chain.invoke({"text": job_description_text}) + self.job_description = output + + def generate_header(self) -> str: + """ + Generate the header section of the resume. + Returns: + str: The generated header section. + """ + return super().generate_header(data={ + "personal_information": self.resume.personal_information, + "job_description": self.job_description + }) + + def generate_education_section(self) -> str: + """ + Generate the education section of the resume. + Returns: + str: The generated education section. + """ + return super().generate_education_section(data={ + "education_details": self.resume.education_details, + "job_description": self.job_description + }) + + def generate_work_experience_section(self) -> str: + """ + Generate the work experience section of the resume. + Returns: + str: The generated work experience section. + """ + return super().generate_work_experience_section(data={ + "experience_details": self.resume.experience_details, + "job_description": self.job_description + }) + + def generate_projects_section(self) -> str: + """ + Generate the side projects section of the resume. + Returns: + str: The generated side projects section. + """ + return super().generate_projects_section(data={ + "projects": self.resume.projects, + "job_description": self.job_description + }) + + def generate_achievements_section(self) -> str: + """ + Generate the achievements section of the resume. + Returns: + str: The generated achievements section. + """ + return super().generate_achievements_section(data={ + "achievements": self.resume.achievements, + "job_description": self.job_description + }) + + + def generate_certifications_section(self) -> str: + """ + Generate the certifications section of the resume. + Returns: + str: The generated certifications section. + """ + return super().generate_certifications_section(data={ + "certifications": self.resume.certifications, + "job_description": self.job_description + }) + + def generate_additional_skills_section(self) -> str: + """ + Generate the additional skills section of the resume. + Returns: + str: The generated additional skills section. + """ + additional_skills_prompt_template = self._preprocess_template_string( + self.strings.prompt_additional_skills + ) + skills = set() + if self.resume.experience_details: + for exp in self.resume.experience_details: + if exp.skills_acquired: + skills.update(exp.skills_acquired) + + if self.resume.education_details: + for edu in self.resume.education_details: + if edu.exam: + for exam in edu.exam: + skills.update(exam.keys()) + prompt = ChatPromptTemplate.from_template(additional_skills_prompt_template) + chain = prompt | self.llm_cheap | StrOutputParser() + output = chain.invoke({ + "languages": self.resume.languages, + "interests": self.resume.interests, + "skills": skills, + "job_description": self.job_description + }) + return output diff --git a/src/ai_hawk/libs/resume_and_cover_builder/module_loader.py b/src/ai_hawk/libs/resume_and_cover_builder/module_loader.py new file mode 100644 index 000000000..a918a1098 --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/module_loader.py @@ -0,0 +1,13 @@ +""" +This module is used to store the global configuration of the application. +""" +# app/libs/resume_and_cover_builder/module_loader.py +import importlib +import sys + +def load_module(module_path: str, module_name: str): + spec = importlib.util.spec_from_file_location(module_name, module_path) + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module \ No newline at end of file diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_facade.py b/src/ai_hawk/libs/resume_and_cover_builder/resume_facade.py new file mode 100644 index 000000000..39491c407 --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/resume_facade.py @@ -0,0 +1,111 @@ +""" +This module contains the FacadeManager class, which is responsible for managing the interaction between the user and the other components of the application. +""" +# app/libs/resume_and_cover_builder/manager_facade.py +import logging +import os + +import inquirer +from pathlib import Path + +from src.utils.chrome_utils import HTML_to_PDF +from .config import global_config + +class ResumeFacade: + def __init__(self, api_key, style_manager, resume_generator, resume_object, output_path): + """ + Initialize the FacadeManager with the given API key, style manager, resume generator, resume object, and log path. + Args: + api_key (str): The OpenAI API key to be used for generating text. + style_manager (StyleManager): The StyleManager instance to manage the styles. + resume_generator (ResumeGenerator): The ResumeGenerator instance to generate resumes and cover letters. + resume_object (str): The resume object to be used for generating resumes and cover letters. + output_path (str): The path to the log file. + """ + lib_directory = Path(__file__).resolve().parent + global_config.STRINGS_MODULE_RESUME_PATH = lib_directory / "resume_prompt/strings_feder-cr.py" + global_config.STRINGS_MODULE_RESUME_JOB_DESCRIPTION_PATH = lib_directory / "resume_job_description_prompt/strings_feder-cr.py" + global_config.STRINGS_MODULE_COVER_LETTER_JOB_DESCRIPTION_PATH = lib_directory / "cover_letter_prompt/strings_feder-cr.py" + global_config.STRINGS_MODULE_NAME = "strings_feder_cr" + global_config.STYLES_DIRECTORY = lib_directory / "resume_style" + global_config.LOG_OUTPUT_FILE_PATH = output_path + global_config.API_KEY = api_key + self.style_manager = style_manager + self.resume_generator = resume_generator + self.resume_generator.set_resume_object(resume_object) + self.selected_style = None # Proprietà per memorizzare lo stile selezionato + + def set_driver(self, driver): + self.driver = driver + + def prompt_user(self, choices: list[str], message: str) -> str: + """ + Prompt the user with the given message and choices. + Args: + choices (list[str]): The list of choices to present to the user. + message (str): The message to display to the user. + Returns: + str: The choice selected by the user. + """ + questions = [ + inquirer.List('selection', message=message, choices=choices), + ] + return inquirer.prompt(questions)['selection'] + + def prompt_for_text(self, message: str) -> str: + """ + Prompt the user to enter text with the given message. + Args: + message (str): The message to display to the user. + Returns: + str: The text entered by the user. + """ + questions = [ + inquirer.Text('text', message=message), + ] + return inquirer.prompt(questions)['text'] + + def choose_style(self) -> None: + """ + Prompt the user to choose a style for the resume. + """ + styles = self.style_manager.get_styles() + if not styles: + print("No styles available") + return None + formatted_choices = self.style_manager.format_choices(styles) + selected_choice = self.prompt_user(formatted_choices, "Which style would you like to adopt?") + self.selected_style = selected_choice.split(' (')[0] + + def create_resume_pdf(self, job_description_text=None) -> bytes: + """ + Create a resume PDF using the selected style and the given job description text. + Args: + job_description_text (str): The job description text to include in the resume. + Returns: + bytes: The PDF content as bytes. + """ + if self.selected_style is None: + raise ValueError("Devi scegliere uno stile prima di generare il PDF.") + + style_path = self.style_manager.get_style_path(self.selected_style) + + if job_description_text is None: + html_resume = self.resume_generator.create_resume(style_path) + else: + html_resume = self.resume_generator.create_resume_job_description_text(style_path, job_description_text) + result = HTML_to_PDF(html_resume, self.driver) + self.driver.quit() + return result + + def create_cover_letter(self, job_description_text: str) -> None: + """ + Create a cover letter based on the given job description text and format. + Args: + job_description_text (str): The job description text to include in the cover letter. + """ + style_path = self.style_manager.get_style_path() + cover_letter_html = self.resume_generator.create_cover_letter_job_description(style_path, job_description_text) + result = HTML_to_PDF(cover_letter_html, self.driver) + self.driver.quit() + return result diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_generator.py b/src/ai_hawk/libs/resume_and_cover_builder/resume_generator.py new file mode 100644 index 000000000..6cd5a5816 --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/resume_generator.py @@ -0,0 +1,44 @@ +""" +This module is responsible for generating resumes and cover letters using the LLM model. +""" +# app/libs/resume_and_cover_builder/resume_generator.py +from string import Template +from typing import Any +from src.ai_hawk.libs.resume_and_cover_builder.llm.llm_generate_resume import LLMResumer +from src.ai_hawk.libs.resume_and_cover_builder.llm.llm_generate_resume_from_job import LLMResumeJobDescription +from src.ai_hawk.libs.resume_and_cover_builder.llm.llm_generate_cover_letter_from_job import LLMCoverLetterJobDescription +from .module_loader import load_module +from .config import global_config + +class ResumeGenerator: + def __init__(self): + pass + + def set_resume_object(self, resume_object): + self.resume_object = resume_object + + + def _create_resume(self, gpt_answerer: Any, style_path): + gpt_answerer.set_resume(self.resume_object) + template = Template(global_config.html_template) + return template.substitute(body=gpt_answerer.generate_html_resume(), style_path=style_path) + + def create_resume(self, style_path): + strings = load_module(global_config.STRINGS_MODULE_RESUME_PATH, global_config.STRINGS_MODULE_NAME) + gpt_answerer = LLMResumer(global_config.API_KEY, strings) + return self._create_resume(gpt_answerer, style_path) + + def create_resume_job_description_text(self, style_path: str, job_description_text: str): + strings = load_module(global_config.STRINGS_MODULE_RESUME_JOB_DESCRIPTION_PATH, global_config.STRINGS_MODULE_NAME) + gpt_answerer = LLMResumeJobDescription(global_config.API_KEY, strings) + gpt_answerer.set_job_description_from_text(job_description_text) + return self._create_resume(gpt_answerer, style_path) + + def create_cover_letter_job_description(self, style_path: str, job_description_text: str): + strings = load_module(global_config.STRINGS_MODULE_COVER_LETTER_JOB_DESCRIPTION_PATH, global_config.STRINGS_MODULE_NAME) + gpt_answerer = LLMCoverLetterJobDescription(global_config.API_KEY, strings) + gpt_answerer.set_resume(self.resume_object) + gpt_answerer.set_job_description_from_text(job_description_text) + cover_letter_html = gpt_answerer.generate_cover_letter() + template = Template(global_config.html_template) + return template.substitute(body=cover_letter_html, style_path=style_path) \ No newline at end of file diff --git a/tests/__init__.py b/src/ai_hawk/libs/resume_and_cover_builder/resume_job_description_prompt/__init__.py similarity index 100% rename from tests/__init__.py rename to src/ai_hawk/libs/resume_and_cover_builder/resume_job_description_prompt/__init__.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_job_description_prompt/strings_feder-cr.py b/src/ai_hawk/libs/resume_and_cover_builder/resume_job_description_prompt/strings_feder-cr.py new file mode 100644 index 000000000..28420e1af --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/resume_job_description_prompt/strings_feder-cr.py @@ -0,0 +1,169 @@ +from libs.resume_and_cover_builder.template_base import prompt_header_template, prompt_education_template, prompt_working_experience_template, prompt_projects_template, prompt_additional_skills_template, prompt_certifications_template, prompt_achievements_template + +prompt_header = """ +Act as an HR expert and resume writer specializing in ATS-friendly resumes. Your task is to create a professional and polished header for the resume. The header should: + +1. **Contact Information**: Include your full name, city and country, phone number, email address, LinkedIn profile, and GitHub profile. +2. **Formatting**: Ensure the contact details are presented clearly and are easy to read. + +To implement this: +- If any of the contact information fields (e.g., LinkedIn profile, GitHub profile) are not provided (i.e., `None`), omit them from the header. + +- **My information:** + {personal_information} +""" + prompt_header_template + +prompt_education = """ +Act as an HR expert and resume writer with a specialization in creating ATS-friendly resumes. Your task is to articulate the educational background for a resume, ensuring it aligns with the provided job description. For each educational entry, ensure you include: + +1. **Institution Name and Location**: Specify the university or educational institution’s name and location. +2. **Degree and Field of Study**: Clearly indicate the degree earned and the field of study. +3. **Grade**: Include your Grade if it is strong and relevant. +4. **Relevant Coursework**: List key courses with their grades to showcase your academic strengths. If no coursework is provided, omit this section from the template. + +To implement this, follow these steps: +- If the exam details are not provided (i.e., `None`), skip the coursework section when filling out the template. +- If the exam details are available, fill out the coursework section accordingly. + + +- **My information:** + {education_details} + +- **Job Description:** + {job_description} +"""+ prompt_education_template + + +prompt_working_experience = """ +Act as an HR expert and resume writer with a specialization in creating ATS-friendly resumes. Your task is to detail the work experience for a resume, ensuring it aligns with the provided job description. For each job entry, ensure you include: + +1. **Company Name and Location**: Provide the name of the company and its location. +2. **Job Title**: Clearly state your job title. +3. **Dates of Employment**: Include the start and end dates of your employment. +4. **Responsibilities and Achievements**: Describe your key responsibilities and notable achievements, emphasizing measurable results and specific contributions. + +Ensure that the descriptions highlight relevant experience and align with the job description. + +To implement this: +- If any of the work experience details (e.g., responsibilities, achievements) are not provided (i.e., `None`), omit those sections when filling out the template. + + +- **My information:** + {experience_details} + +- **Job Description:** + {job_description} +"""+ prompt_working_experience_template + + +prompt_projects = """ +Act as an HR expert and resume writer with a specialization in creating ATS-friendly resumes. Your task is to highlight notable side projects based on the provided job description. For each project, ensure you include: + +1. **Project Name and Link**: Provide the name of the project and include a link to the GitHub repository or project page. +2. **Project Details**: Describe any notable recognition or achievements related to the project, such as GitHub stars or community feedback. +3. **Technical Contributions**: Highlight your specific contributions and the technologies used in the project. + +Ensure that the project descriptions demonstrate your skills and achievements relevant to the job description. + +To implement this: +- If any of the project details (e.g., link, achievements) are not provided (i.e., `None`), omit those sections when filling out the template. + + +- **My information:** + {projects} + +- **Job Description:** + {job_description} +"""+ prompt_projects_template + + +prompt_achievements = """ +Act as an HR expert and resume writer with a specialization in creating ATS-friendly resumes. Your task is to list significant achievements based on the provided job description. For each achievement, ensure you include: + +1. **Award or Recognition**: Clearly state the name of the award, recognition, scholarship, or honor. +2. **Description**: Provide a brief description of the achievement and its relevance to your career or academic journey. + +Ensure that the achievements are clearly presented and effectively highlight your accomplishments. + +To implement this: +- If any of the achievement details (e.g., certifications, descriptions) are not provided (i.e., `None`), omit those sections when filling out the template. + + +- **My information:** + {achievements} + +- **Job Description:** + {job_description} +"""+ prompt_achievements_template + + +prompt_certifications = """ +Act as an HR expert and resume writer with a specialization in creating ATS-friendly resumes. Your task is to list significant certifications based on the provided details. For each certification, ensure you include: + +1. **Certification Name**: Clearly state the name of the certification. +2. **Description**: Provide a brief description of the certification and its relevance to your professional or academic career. + +Ensure that the certifications are clearly presented and effectively highlight your qualifications. + +To implement this: + +If any of the certification details (e.g., descriptions) are not provided (i.e., None), omit those sections when filling out the template. + +- **My information:** + {certifications} + +- **Job Description:** + {job_description} +"""+ prompt_certifications_template + + +prompt_additional_skills = """ +Act as an HR expert and resume writer with a specialization in creating ATS-friendly resumes. Your task is to list additional skills relevant to the job. For each skill, ensure you include: +Do not add any information beyond what is listed in the provided data fields. Only use the information provided in the 'languages', 'interests', and 'skills' fields to formulate your responses. Avoid extrapolating or incorporating details from the job description or other external sources. + +1. **Skill Category**: Clearly state the category or type of skill. +2. **Specific Skills**: List the specific skills or technologies within each category. +3. **Proficiency and Experience**: Briefly describe your experience and proficiency level. + +Ensure that the skills listed are relevant and accurately reflect your expertise in the field. + +To implement this: +- If any of the skill details (e.g., languages, interests, skills) are not provided (i.e., `None`), omit those sections when filling out the template. + + +- **My information:** + {languages} + {interests} + {skills} + +- **Job Description:** + {job_description} +"""+ prompt_additional_skills_template + +summarize_prompt_template = """ +As a seasoned HR expert, your task is to identify and outline the key skills and requirements necessary for the position of this job. Use the provided job description as input to extract all relevant information. This will involve conducting a thorough analysis of the job's responsibilities and the industry standards. You should consider both the technical and soft skills needed to excel in this role. Additionally, specify any educational qualifications, certifications, or experiences that are essential. Your analysis should also reflect on the evolving nature of this role, considering future trends and how they might affect the required competencies. + +Rules: +Remove boilerplate text +Include only relevant information to match the job description against the resume + +# Analysis Requirements +Your analysis should include the following sections: +Technical Skills: List all the specific technical skills required for the role based on the responsibilities described in the job description. +Soft Skills: Identify the necessary soft skills, such as communication abilities, problem-solving, time management, etc. +Educational Qualifications and Certifications: Specify the essential educational qualifications and certifications for the role. +Professional Experience: Describe the relevant work experiences that are required or preferred. +Role Evolution: Analyze how the role might evolve in the future, considering industry trends and how these might influence the required skills. + +# Final Result: +Your analysis should be structured in a clear and organized document with distinct sections for each of the points listed above. Each section should contain: +This comprehensive overview will serve as a guideline for the recruitment process, ensuring the identification of the most qualified candidates. + +# Job Description: +``` +{text} +``` + +--- + +# Job Description Summary""" diff --git a/src/job_portals/linkedIn/__init__py b/src/ai_hawk/libs/resume_and_cover_builder/resume_prompt/__init__.py similarity index 100% rename from src/job_portals/linkedIn/__init__py rename to src/ai_hawk/libs/resume_and_cover_builder/resume_prompt/__init__.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_prompt/strings_feder-cr.py b/src/ai_hawk/libs/resume_and_cover_builder/resume_prompt/strings_feder-cr.py new file mode 100644 index 000000000..53b16189c --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/resume_prompt/strings_feder-cr.py @@ -0,0 +1,92 @@ +from libs.resume_and_cover_builder.template_base import * + +prompt_header = """ +Act as an HR expert and resume writer specializing in ATS-friendly resumes. Your task is to create a professional and polished header for the resume. The header should: + +1. **Contact Information**: Include your full name, city and country, phone number, email address, LinkedIn profile, and GitHub profile. Exclude any information that is not provided. +2. **Formatting**: Ensure the contact details are presented clearly and are easy to read. + +- **My information:** + {personal_information} +""" + prompt_header_template + + +prompt_education = """ +Act as an HR expert and resume writer with a specialization in creating ATS-friendly resumes. Your task is to articulate the educational background for a resume. For each educational entry, ensure you include: + +1. **Institution Name and Location**: Specify the university or educational institution’s name and location. +2. **Degree and Field of Study**: Clearly indicate the degree earned and the field of study. +3. **Grade**: Include your Grade if it is strong and relevant. +4. **Relevant Coursework**: List key courses with their grades to showcase your academic strengths. + +- **My information:** + {education_details} +"""+ prompt_education_template + + +prompt_working_experience = """ +Act as an HR expert and resume writer with a specialization in creating ATS-friendly resumes. Your task is to detail the work experience for a resume. For each job entry, ensure you include: + +1. **Company Name and Location**: Provide the name of the company and its location. +2. **Job Title**: Clearly state your job title. +3. **Dates of Employment**: Include the start and end dates of your employment. +4. **Responsibilities and Achievements**: Describe your key responsibilities and notable achievements, emphasizing measurable results and specific contributions. + +- **My information:** + {experience_details} +"""+ prompt_working_experience_template + + +prompt_projects = """ +Act as an HR expert and resume writer with a specialization in creating ATS-friendly resumes. Your task is to highlight notable side projects. For each project, ensure you include: + +1. **Project Name and Link**: Provide the name of the project and include a link to the GitHub repository or project page. +2. **Project Details**: Describe any notable recognition or achievements related to the project, such as GitHub stars or community feedback. +3. **Technical Contributions**: Highlight your specific contributions and the technologies used in the project. + +- **My information:** + {projects} +"""+ prompt_projects_template + + +prompt_achievements = """ +Act as an HR expert and resume writer with a specialization in creating ATS-friendly resumes. Your task is to list significant achievements. For each achievement, ensure you include: + +1. **Award or Recognition**: Clearly state the name of the award, recognition, scholarship, or honor. +2. **Description**: Provide a brief description of the achievement and its relevance to your career or academic journey. + +- **My information:** + {achievements} +"""+ prompt_achievements_template + + +prompt_certifications = """ +Act as an HR expert and resume writer with a specialization in creating ATS-friendly resumes. Your task is to list significant certifications based on the provided details. For each certification, ensure you include: + +1. **Certification Name**: Clearly state the name of the certification. +2. **Description**: Provide a brief description of the certification and its relevance to your professional or academic career. + +Ensure that the certifications are clearly presented and effectively highlight your qualifications. + +To implement this: + +If any of the certification details (e.g., descriptions) are not provided (i.e., None), omit those sections when filling out the template. + +- **My information:** + {certifications} + +"""+ prompt_certifications_template + + +prompt_additional_skills = """ +Act as an HR expert and resume writer with a specialization in creating ATS-friendly resumes. Your task is to list additional skills relevant to the job. For each skill, ensure you include: + +1. **Skill Category**: Clearly state the category or type of skill. +2. **Specific Skills**: List the specific skills or technologies within each category. +3. **Proficiency and Experience**: Briefly describe your experience and proficiency level. + +- **My information:** + {languages} + {interests} + {skills} +"""+ prompt_additional_skills_template diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_style/__init__.py b/src/ai_hawk/libs/resume_and_cover_builder/resume_style/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_cloyola.css b/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_cloyola.css new file mode 100644 index 000000000..58cef557e --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_cloyola.css @@ -0,0 +1,149 @@ +/*Cloyola Grey $https://github.com/cloyola*/ +@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600;700&display=swap'); +@import url('https://fonts.googleapis.com/css2?family=Roboto:ital,wght@0,100;0,300;0,400;0,500;0,700;0,900;1,100;1,300;1,400;1,500;1,700;1,900&display=swap'); + +body { + font-family: 'Roboto', sans-serif; + line-height: 1.4; + color: #333; + max-width: 700px; + margin: 0 auto; + padding: 10px; + font-size: 9pt; +} + +header { + text-align: left; + margin-bottom: 20px; + background-color: #7c7c7c40; + padding: 20px; + border-radius: 8px; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); +} + +h1 { + font-size: 18pt; + font-weight: 700; + margin: 0 0 5px 0; +} + +.contact-info { + display: flex; + justify-content: left; + flex-wrap: wrap; + gap: 10px; + font-size: 9pt; + font-weight: normal; +} + +.contact-info p { + margin: 0; +} + +.contact-info a { + color: #0077b5; + text-decoration: none; +} + +.fab, +.fas { + margin-right: 3px; +} + +span.entry-location { + font-weight: normal; +} + +h2 { + font-size: 14pt; + font-weight: 600; + border-bottom: 1px dotted #4c4c4c; + padding-bottom: 2px; + margin: 10px 0 5px 0; + text-align: left; +} + +.entry { + margin-bottom: 15px; /*margin-bottom: 8px;*/ + background-color: #fff; + padding: 15px; + border-radius: 8px; + box-shadow: 3px 3px 5px 2px rgba(0, 0, 0, 0.2); +} + +.entry-header { + display: flex; + justify-content: space-between; + font-weight: 600; + font-size: 10pt; +} + +.entry-details { + display: flex; + justify-content: space-between; + font-style: italic; + margin-bottom: 2px; + font-size: 9pt; +} + +.compact-list { + margin: 2px 0; + padding-left: 15px; +} + +.compact-list li { + margin-bottom: 2px; +} + +.two-column { + display: flex; + justify-content: space-between; +} + +.two-column ul { + width: 48%; + margin: 0; + padding-left: 15px; + list-style-type: circle; +} + +a { + color: #0077b5; + text-decoration: none; +} + +a:hover { + text-decoration: underline; +} + +@media print { + body { + padding: 0; + margin: 0; + font-size: 9pt; + } + + @page { + margin: 0.5cm; + } + + h1 { + font-size: 18pt; + } + + h2 { + font-size: 11pt; + } + + .contact-info { + font-size: 8pt; + } + + .entry-details { + font-size: 7pt; + } + + .compact-list { + padding-left: 12px; + } +} diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_josylad_blue.css b/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_josylad_blue.css new file mode 100644 index 000000000..ae5091e0e --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_josylad_blue.css @@ -0,0 +1,166 @@ +/*Modern Blue$https://github.com/josylad*/ + +@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600;700&display=swap'); + +body { + font-family: 'Poppins', sans-serif; + line-height: 1.6; + color: #2c3e50; + max-width: 850px; + margin: 0 auto; + padding: 20px; + font-size: 10pt; + background-color: #f9f9f9; +} + +header { + text-align: center; + margin-bottom: 20px; + background-color: #3498db; + padding: 20px; + border-radius: 8px; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); +} + +h1 { + font-size: 28pt; + font-weight: 700; + margin: 0 0 10px 0; + color: #fff; +} + +.contact-info { + display: flex; + justify-content: center; + flex-wrap: wrap; + gap: 15px; + font-size: 10pt; + font-weight: 300; + color: #ecf0f1; +} + +.contact-info p { + margin: 0; +} + +.contact-info a { + color: #ecf0f1; + text-decoration: none; + transition: color 0.3s ease; +} + +.contact-info a:hover { + color: #2c3e50; +} + +.fab, +.fas { + margin-right: 5px; +} + +h2 { + font-size: 18pt; + font-weight: 600; + border-bottom: 2px solid #3498db; + padding-bottom: 5px; + margin: 20px 0 15px 0; + color: #2c3e50; +} + +.entry { + margin-bottom: 15px; + background-color: #fff; + padding: 15px; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); +} + +.entry-header { + display: flex; + justify-content: space-between; + font-weight: 600; + color: #3498db; +} + +.entry-details { + display: flex; + justify-content: space-between; + font-style: italic; + margin-bottom: 8px; + font-size: 9pt; + color: #7f8c8d; +} + +.compact-list { + margin: 5px 0; + padding-left: 20px; +} + +.compact-list li { + margin-bottom: 5px; +} + +.two-column { + display: flex; + justify-content: space-between; + flex-wrap: wrap; +} + +.two-column ul { + width: 48%; + margin: 0; + padding-left: 20px; +} + +a { + color: #3498db; + text-decoration: none; + transition: color 0.3s ease; +} + +a:hover { + color: #2980b9; + text-decoration: underline; +} + +@media print { + body { + padding: 0; + margin: 0; + font-size: 9pt; + background-color: #fff; + } + + @page { + margin: 1cm; + } + + h1 { + font-size: 24pt; + } + + h2 { + font-size: 16pt; + } + + .contact-info { + font-size: 9pt; + } + + .entry-details { + font-size: 8pt; + } + + .compact-list { + padding-left: 15px; + } + + header { + box-shadow: none; + } + + .entry { + box-shadow: none; + padding: 10px 0; + } +} \ No newline at end of file diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_josylad_grey.css b/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_josylad_grey.css new file mode 100644 index 000000000..b56692f36 --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_josylad_grey.css @@ -0,0 +1,208 @@ +/*Modern Grey$https://github.com/josylad*/ +@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600;700&display=swap'); + +body { + font-family: 'Poppins', sans-serif; + line-height: 1.6; + color: #333; + max-width: 850px; + margin: 0 auto; + padding: 20px; + font-size: 10pt; + background-color: #f9f9f9; +} + +header { + text-align: center; + margin-bottom: 20px; + background-color: #4a4a4a; + padding: 20px; + border-radius: 8px; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); +} + +h1 { + font-size: 28pt; + font-weight: 700; + margin: 0 0 10px 0; + color: #fff; +} + +.contact-info { + display: flex; + justify-content: center; + flex-wrap: wrap; + gap: 15px; + font-size: 10pt; + font-weight: 300; + color: #e0e0e0; +} + +.contact-info p { + margin: 0; +} + +.contact-info a { + color: #e0e0e0; + text-decoration: none; + transition: color 0.3s ease; +} + +.contact-info a:hover { + color: #fff; +} + +.fab, +.fas { + margin-right: 5px; +} + +h2 { + font-size: 18pt; + font-weight: 600; + border-bottom: 2px solid #4a4a4a; + padding-bottom: 5px; + margin: 20px 0 15px 0; + color: #333; +} + +.entry { + margin-bottom: 15px; + background-color: #fff; + padding: 15px; + border-radius: 8px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); +} + +.entry-header { + display: flex; + justify-content: space-between; + font-weight: 600; + color: #4a4a4a; +} + +.entry-details { + display: flex; + justify-content: space-between; + font-style: italic; + margin-bottom: 8px; + font-size: 9pt; + color: #777; +} + +.compact-list { + margin: 5px 0; + padding-left: 20px; +} + +.compact-list li { + margin-bottom: 5px; +} + +.skills-section { + margin-top: 20px; +} + +.skills-section h2 { + font-size: 18pt; + font-weight: 600; + border-bottom: 2px solid #4a4a4a; + padding-bottom: 5px; + margin: 0 0 15px 0; + color: #333; + text-align: center; +} + +.skills-container { + display: flex; + justify-content: space-between; +} + +.skills-column { + width: 48%; +} + +.skills-list { + list-style-type: none; + padding: 0; + margin: 0; +} + +.skills-list li { + margin-bottom: 8px; + display: flex; + align-items: center; +} + +.skills-list li::before { + content: "•"; + color: #4a4a4a; + font-weight: bold; + display: inline-block; + width: 1em; + margin-right: 0.5em; +} + +a { + color: #4a4a4a; + text-decoration: none; + transition: color 0.3s ease; +} + +a:hover { + color: #333; + text-decoration: underline; +} + +@media print { + body { + padding: 0; + margin: 0; + font-size: 9pt; + background-color: #fff; + } + + @page { + margin: 1cm; + } + + h1 { + font-size: 24pt; + } + + h2 { + font-size: 16pt; + } + + .contact-info { + font-size: 9pt; + } + + .entry-details { + font-size: 8pt; + } + + .compact-list, + .skills-list { + padding-left: 15px; + } + + header { + box-shadow: none; + } + + .entry { + box-shadow: none; + padding: 10px 0; + } +} + +@media (max-width: 600px) { + .skills-container { + flex-direction: column; + } + + .skills-column { + width: 100%; + } +} \ No newline at end of file diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_krishnavalliappan.css b/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_krishnavalliappan.css new file mode 100644 index 000000000..541be553d --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_krishnavalliappan.css @@ -0,0 +1,136 @@ +/*Default$https://github.com/krishnavalliappan*/ +body { + font-family: "Barlow", Arial, sans-serif; + line-height: 1.2; + color: #333; + max-width: 700px; + margin: 0 auto; + padding: 10px; + font-size: 9pt; +} + +header { + text-align: center; + margin-bottom: 10px; +} + +h1 { + font-size: 24pt; + font-weight: 700; + margin: 0 0 5px 0; +} + +.contact-info { + display: flex; + justify-content: center; + flex-wrap: wrap; + gap: 10px; + font-size: 9pt; + font-weight: normal; +} + +.contact-info p { + margin: 0; +} + +.contact-info a { + color: #0077b5; + text-decoration: none; +} + +.fab, +.fas { + margin-right: 3px; +} + +span { + font-weight: normal; +} + +h2 { + font-size: 16pt; + font-weight: 600; + border-bottom: 1px solid #333; + padding-bottom: 2px; + margin: 10px 0 5px 0; + text-align: center; +} + +.entry { + margin-bottom: 8px; +} + +.entry-header { + display: flex; + justify-content: space-between; + font-weight: 600; +} + +.entry-details { + display: flex; + justify-content: space-between; + font-style: italic; + margin-bottom: 2px; + font-size: 8pt; +} + +.compact-list { + margin: 2px 0; + padding-left: 15px; +} + +.compact-list li { + margin-bottom: 2px; +} + +.two-column { + display: flex; + justify-content: space-between; +} + +.two-column ul { + width: 48%; + margin: 0; + padding-left: 15px; +} + +a { + color: #0077b5; + text-decoration: none; +} + +a:hover { + text-decoration: underline; +} + +@media print { + body { + padding: 0; + margin: 0; + font-size: 9pt; + } + + @page { + margin: 0.5cm; + } + + h1 { + font-size: 18pt; + } + + h2 { + font-size: 11pt; + } + + .contact-info { + font-size: 8pt; + } + + .entry-details { + font-size: 7pt; + } + + .compact-list { + padding-left: 12px; + } +} diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_samodum_bold.css b/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_samodum_bold.css new file mode 100644 index 000000000..05a4d3194 --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_samodum_bold.css @@ -0,0 +1,191 @@ +/*Clean Blue$https://github.com/samodum*/ +@import url("https://fonts.googleapis.com/css2?family=Josefin+Sans&family=Kaisei+HarunoUmi&family=Open+Sans:ital,wght@0,400;0,600;1,400&display=swap"); + +:root { + --pageWidth: 49.62rem; + --textColor: #383838; + --lineColorA: #b8b8b8; + --accentColor: blue; + --HFont: "Josefin Sans", sans-serif; + --PFont: "Open Sans", sans-serif; + --BText: "Kaisei HarunoUmi", serif; + --sectionSpacing: 1.5rem; + --bodyFontSize: 0.875rem; + --KeyColumn: 9.375rem; +} + +* { + margin: 0; + padding: 0; + box-sizing: border-box; + color: var(--textColor); + font-size: var(--bodyFontSize); +} + +body { + /* border: 1px solid var(--accentColor); page guidelines*/ + max-width: var(--pageWidth); + padding: 3.375rem 1.5rem; + display: flex; + font-family: var(--PFont); + flex-direction: column; + gap: 1.5rem; + margin: 0 auto; +} + +main { + display: flex; + flex-direction: column; + gap: 1.5rem; + order: 2; +} + +a { + text-decoration: none; +} + +a:hover { + color: var(--accentColor); + transition: color 0.3s ease; +} + +header { + order: 0; + display: flex; + flex-direction: column; + justify-content: space-between; + align-items: start; + gap: 1.5rem; +} + +h1 { + font-family: var(--HFont); + font-size: 1.5rem; + font-weight: 400; + margin-bottom: -0.125rem; + color: var(--accentColor); +} + +.contact-info { + display: flex; + flex-direction: column; + gap: 0.125rem; +} + +.contact-info p { + font-family: var(--PFont); +} + +.contact-info p::before { + margin-right: 0.25rem; + text-transform: capitalize; + font-family: var(--HFont); + font-weight: 600; +} + +.contact-info p:nth-child(1)::before { + content: "address:"; +} +.contact-info p:nth-child(2)::before { + content: "phone:"; +} +.contact-info p:nth-child(3)::before { + content: "email:"; +} +.contact-info p:nth-child(4)::before { + content: "linkedin:"; +} +.contact-info p:nth-child(5)::before { + content: "github:"; +} + +section h2 { + font-family: var(--HFont); + font-size: 1.125rem; + font-weight: bold; + color: var(--accentColor); + padding-bottom: 0.25rem; + margin-bottom: 0.5rem; + border-bottom: 1px solid var(--lineColorA); +} + +.entry { + padding-top: 1rem; + display: grid; + grid-template-columns: 1fr 4fr; + column-gap: 10px; +} + +.entry:first-of-type { + padding-top: 0.5rem; +} + +.entry-header { + grid-column: 1; + font-family: var(--HFont); + font-weight: 600; + display: flex; + flex-direction: column; + gap: 0.25rem; +} + +.entry-details, +#side-projects .compact-list { + margin-top: -4px; +} + +.entry-details, +.compact-list { + grid-column: 2; +} + +.entry-title { + font-family: var(--HFont); + font-weight: 600; + margin-right: 0.25rem; +} + +.entry-year { + font-style: italic; +} + +.compact-list { + padding-left: 10px; + list-style-type: circle; + margin: 0; +} + +.compact-list li { + margin-left: 5px; +} + +#achievements .compact-list { + padding-top: 0.25rem; +} + +.two-column { + padding-top: 0.25rem; + display: grid; + grid-template-columns: 1fr 1fr; + column-gap: 10px; +} + +.two-column .compact-list:first-child { + grid-column: 1; +} + +#work-experience { + order: 1; +} +#education { + order: 2; +} +#achievements { + order: 3; +} +#side-projects { + order: 4; +} +#skills-languages { + order: 5; +} diff --git a/src/ai_hawk/libs/resume_and_cover_builder/style_manager.py b/src/ai_hawk/libs/resume_and_cover_builder/style_manager.py new file mode 100644 index 000000000..54a317052 --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/style_manager.py @@ -0,0 +1,126 @@ +# src/ai_hawk/libs/resume_and_cover_builder/style_manager.py +import os +from pathlib import Path +from typing import Dict, List, Tuple, Optional +import inquirer +import webbrowser +import sys +import logging + +# Configura il logging +logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') + +class StyleManager: + + def __init__(self): + self.styles_directory: Optional[Path] = None + self.selected_style: Optional[str] = None + current_file = Path(__file__).resolve() + # Salire di 4 livelli per raggiungere la radice del progetto + project_root = current_file.parent.parent.parent.parent # Adatta se la struttura cambia + + # Imposta la directory degli stili in modo robusto + self.styles_directory = project_root / "ai_hawk" / "libs" / "resume_and_cover_builder" / "resume_style" + + logging.debug(f"Project root determinato come: {project_root}") + logging.debug(f"Directory degli stili impostata su: {self.styles_directory}") + + def get_styles(self) -> Dict[str, Tuple[str, str]]: + """ + Ottiene gli stili disponibili nella directory degli stili. + Returns: + Dict[str, Tuple[str, str]]: Un dizionario che mappa i nomi degli stili ai loro file e link degli autori. + """ + styles_to_files = {} + if not self.styles_directory: + logging.warning("Directory degli stili non impostata.") + return styles_to_files + logging.debug(f"Leggendo la directory degli stili: {self.styles_directory}") + try: + files = [f for f in self.styles_directory.iterdir() if f.is_file()] + logging.debug(f"Files trovati: {[f.name for f in files]}") + for file_path in files: + logging.debug(f"Processando file: {file_path}") + with file_path.open('r', encoding='utf-8') as file: + first_line = file.readline().strip() + logging.debug(f"Prima linea del file {file_path.name}: {first_line}") + if first_line.startswith("/*") and first_line.endswith("*/"): + content = first_line[2:-2].strip() + if '$' in content: + style_name, author_link = content.split('$', 1) + style_name = style_name.strip() + author_link = author_link.strip() + styles_to_files[style_name] = (file_path.name, author_link) + logging.info(f"Aggiunto stile: {style_name} da {author_link}") + except FileNotFoundError: + logging.error(f"Directory {self.styles_directory} non trovata.") + except PermissionError: + logging.error(f"Permesso negato per accedere a {self.styles_directory}.") + except Exception as e: + logging.error(f"Errore imprevisto durante la lettura degli stili: {e}") + return styles_to_files + + def format_choices(self, styles_to_files: Dict[str, Tuple[str, str]]) -> List[str]: + """ + Format the style choices for the user. + Args: + styles_to_files (Dict[str, Tuple[str, str]]): A dictionary mapping style names to their file names and author links. + Returns: + List[str]: A list of formatted style choices. + """ + return [f"{style_name} (style author -> {author_link})" for style_name, (file_name, author_link) in styles_to_files.items()] + + def get_style_path(self) -> Path: + """ + Get the path to the selected style. + Args: + selected_style (str): The selected style. + Returns: + Path: a Path object representing the path to the selected style file. + """ + styles = self.get_styles() + if self.selected_style not in styles: + raise ValueError(f"Style '{self.selected_style}' not found.") + file_name, _ = styles[self.selected_style] + return self.styles_directory / file_name + + def choose_style(self) -> Optional[str]: + """ + Prompt the user to select a style using inquirer. + Returns: + Optional[str]: The name of the selected style, or None if selection was canceled. + """ + styles = self.get_styles() + if not styles: + logging.warning("Nessuno stile disponibile per la selezione.") + return None + + final_style_choice = "Crea il tuo stile di resume in CSS" + formatted_choices = self.format_choices(styles) + formatted_choices.append(final_style_choice) + + questions = [ + inquirer.List( + 'selected_style', + message="Quale stile vorresti adottare?", + choices=formatted_choices + ) + ] + + answers = inquirer.prompt(questions) + if answers and 'selected_style' in answers: + selected_display = answers['selected_style'] + if selected_display == final_style_choice: + tutorial_url = "https://github.com/feder-cr/lib_resume_builder_AIHawk/blob/main/how_to_contribute/web_designer.md" + logging.info("\nApro il tutorial nel tuo browser...") + webbrowser.open(tutorial_url) + sys.exit(0) + else: + # Estrai il nome dello stile dal formato "style_name (style author -> author_link)" + style_name = selected_display.split(' (')[0] + logging.info(f"Hai selezionato lo stile: {style_name}") + self.selected_style = style_name + return style_name + else: + logging.warning("Selezione annullata.") + return None diff --git a/src/ai_hawk/libs/resume_and_cover_builder/template_base.py b/src/ai_hawk/libs/resume_and_cover_builder/template_base.py new file mode 100644 index 000000000..6b379da1f --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/template_base.py @@ -0,0 +1,239 @@ +""" +This module is used to store the global configuration of the application. +""" +# app/libs/resume_and_cover_builder/template_base.py + + + +prompt_cover_letter_template = """ +- **Template to Use** +``` +
+
+
+

[Your Name]

+

[Your Address]

+

[City, State ZIP]

+

[Your Email]

+

[Your Phone Number]

+
+
+

[Company Name]

+
+
+
+

Dear [Recipient Team],

+

[Opening paragraph: Introduce yourself and state the position you are applying for.]

+

[Body paragraphs: Highlight your qualifications, experiences, and how they align with the job requirements.]

+

[Closing paragraph: Express your enthusiasm for the position and thank the recipient for their consideration.]

+

Sincerely,

+

[Your Name]

+

[Date]

+
+
+``` +The results should be provided in html format, Provide only the html code for the cover letter, without any explanations or additional text and also without ```html ``` +""" +prompt_header_template = """ +- **Template to Use** +``` +
+

[Name and Surname]

+
+

+ [Your City, Your Country] +

+

+ [Your Prefix Phone number] +

+

+ [Your Email] +

+

+ LinkedIn +

+

+ GitHub +

+
+
+``` +The results should be provided in html format, Provide only the html code for the resume, without any explanations or additional text and also without ```html ``` +""" + +prompt_education_template = """ +- **Template to Use** +``` +
+

Education

+
+
+ [University Name] + [Location] +
+
+ [Degree] in [Field of Study] | Grade: [Your Grade] + [Start Year] – [End Year] +
+
    +
  • [Course Name] → Grade: [Grade]
  • +
  • [Course Name] → Grade: [Grade]
  • +
  • [Course Name] → Grade: [Grade]
  • +
  • [Course Name] → Grade: [Grade]
  • +
  • [Course Name] → Grade: [Grade]
  • +
+
+
+``` +The results should be provided in html format, Provide only the html code for the resume, without any explanations or additional text and also without ```html ```""" + + +prompt_working_experience_template = """ +- **Template to Use** +``` +
+

Work Experience

+
+
+ [Company Name] + [Location] +
+
+ [Your Job Title] + [Start Date] – [End Date] +
+
    +
  • [Describe your responsibilities and achievements in this role]
  • +
  • [Describe any key projects or technologies you worked with]
  • +
  • [Mention any notable accomplishments or results]
  • +
+
+
+
+ [Company Name] + [Location] +
+
+ [Your Job Title] + [Start Date] – [End Date] +
+
    +
  • [Describe your responsibilities and achievements in this role]
  • +
  • [Describe any key projects or technologies you worked with]
  • +
  • [Mention any notable accomplishments or results]
  • +
+
+
+
+ [Company Name] + [Location] +
+
+ [Your Job Title] + [Start Date] – [End Date] +
+
    +
  • [Describe your responsibilities and achievements in this role]
  • +
  • [Describe any key projects or technologies you worked with]
  • +
  • [Mention any notable accomplishments or results]
  • +
+
+
+``` +The results should be provided in html format, Provide only the html code for the resume, without any explanations or additional text and also without ```html ```""" + + +prompt_projects_template = """ +- **Template to Use** +``` +
+

Side Projects

+
+
+ [Project Name] +
+
    +
  • [Describe any notable recognition or reception]
  • +
  • [Describe any notable recognition or reception]
  • +
+
+
+
+ [Project Name] +
+
    +
  • [Describe any notable recognition or reception]
  • +
  • [Describe any notable recognition or reception]
  • +
+
+
+
+ [Project Name] +
+
    +
  • [Describe any notable recognition or reception]
  • +
  • [Describe any notable recognition or reception]
  • +
+
+
+``` +The results should be provided in html format, Provide only the html code for the resume, without any explanations or additional text and also without ```html ``` +""" + + +prompt_achievements_template = """ +- **Template to Use** +``` +
+

Achievements

+ +
+``` +The results should be provided in html format, Provide only the html code for the resume, without any explanations or additional text and also without ```html ``` +""" + +prompt_certifications_template = """ +- **Template to Use** +``` +
+

Certifications

+ +
+``` +The results should be provided in html format, Provide only the html code for the resume, without any explanations or additional text and also without ```html ``` +""" + +prompt_additional_skills_template = """ +- **Template to Use** +''' +
+

Additional Skills

+
+
    +
  • [Specific Skill or Technology]
  • +
  • [Specific Skill or Technology]
  • +
  • [Specific Skill or Technology]
  • +
  • [Specific Skill or Technology]
  • +
  • [Specific Skill or Technology]
  • +
  • [Specific Skill or Technology]
  • +
+
    +
  • [Specific Skill or Technology]
  • +
  • [Specific Skill or Technology]
  • +
  • [Specific Skill or Technology]
  • +
  • [Specific Skill or Technology]
  • +
  • [Specific Skill or Technology]
  • +
  • Languages:
  • +
+
+
+''' +The results should be provided in html format, Provide only the html code for the resume, without any explanations or additional text and also without ```html ``` +""" diff --git a/src/ai_hawk/libs/resume_and_cover_builder/utils.py b/src/ai_hawk/libs/resume_and_cover_builder/utils.py new file mode 100644 index 000000000..9ae2a0e07 --- /dev/null +++ b/src/ai_hawk/libs/resume_and_cover_builder/utils.py @@ -0,0 +1,131 @@ +""" +This module contains utility functions for the Resume and Cover Letter Builder service. +""" + +# app/libs/resume_and_cover_builder/utils.py +import json +import openai +import time +from datetime import datetime +from typing import Dict, List +from langchain_core.messages.ai import AIMessage +from langchain_core.prompt_values import StringPromptValue +from langchain_openai import ChatOpenAI +from .config import global_config +from loguru import logger +from requests.exceptions import HTTPError as HTTPStatusError + + +class LLMLogger: + + def __init__(self, llm: ChatOpenAI): + self.llm = llm + + @staticmethod + def log_request(prompts, parsed_reply: Dict[str, Dict]): + calls_log = global_config.LOG_OUTPUT_FILE_PATH / "open_ai_calls.json" + if isinstance(prompts, StringPromptValue): + prompts = prompts.text + elif isinstance(prompts, Dict): + # Convert prompts to a dictionary if they are not in the expected format + prompts = { + f"prompt_{i+1}": prompt.content + for i, prompt in enumerate(prompts.messages) + } + else: + prompts = { + f"prompt_{i+1}": prompt.content + for i, prompt in enumerate(prompts.messages) + } + + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + # Extract token usage details from the response + token_usage = parsed_reply["usage_metadata"] + output_tokens = token_usage["output_tokens"] + input_tokens = token_usage["input_tokens"] + total_tokens = token_usage["total_tokens"] + + # Extract model details from the response + model_name = parsed_reply["response_metadata"]["model_name"] + prompt_price_per_token = 0.00000015 + completion_price_per_token = 0.0000006 + + # Calculate the total cost of the API call + total_cost = (input_tokens * prompt_price_per_token) + ( + output_tokens * completion_price_per_token + ) + + # Create a log entry with all relevant information + log_entry = { + "model": model_name, + "time": current_time, + "prompts": prompts, + "replies": parsed_reply["content"], # Response content + "total_tokens": total_tokens, + "input_tokens": input_tokens, + "output_tokens": output_tokens, + "total_cost": total_cost, + } + + # Write the log entry to the log file in JSON format + with open(calls_log, "a", encoding="utf-8") as f: + json_string = json.dumps(log_entry, ensure_ascii=False, indent=4) + f.write(json_string + "\n") + + +class LoggerChatModel: + + def __init__(self, llm: ChatOpenAI): + self.llm = llm + + def __call__(self, messages: List[Dict[str, str]]) -> str: + max_retries = 15 + retry_delay = 10 + + for attempt in range(max_retries): + try: + reply = self.llm.invoke(messages) + parsed_reply = self.parse_llmresult(reply) + LLMLogger.log_request(prompts=messages, parsed_reply=parsed_reply) + return reply + except (openai.RateLimitError, HTTPStatusError) as err: + if isinstance(err, HTTPStatusError) and err.response.status_code == 429: + logger.warning(f"HTTP 429 Too Many Requests: Waiting for {retry_delay} seconds before retrying (Attempt {attempt + 1}/{max_retries})...") + time.sleep(retry_delay) + retry_delay *= 2 + else: + wait_time = self.parse_wait_time_from_error_message(str(err)) + logger.warning(f"Rate limit exceeded or API error. Waiting for {wait_time} seconds before retrying (Attempt {attempt + 1}/{max_retries})...") + time.sleep(wait_time) + except Exception as e: + logger.error(f"Unexpected error occurred: {str(e)}, retrying in {retry_delay} seconds... (Attempt {attempt + 1}/{max_retries})") + time.sleep(retry_delay) + retry_delay *= 2 + + logger.critical("Failed to get a response from the model after multiple attempts.") + raise Exception("Failed to get a response from the model after multiple attempts.") + + def parse_llmresult(self, llmresult: AIMessage) -> Dict[str, Dict]: + # Parse the LLM result into a structured format. + content = llmresult.content + response_metadata = llmresult.response_metadata + id_ = llmresult.id + usage_metadata = llmresult.usage_metadata + + parsed_result = { + "content": content, + "response_metadata": { + "model_name": response_metadata.get("model_name", ""), + "system_fingerprint": response_metadata.get("system_fingerprint", ""), + "finish_reason": response_metadata.get("finish_reason", ""), + "logprobs": response_metadata.get("logprobs", None), + }, + "id": id_, + "usage_metadata": { + "input_tokens": usage_metadata.get("input_tokens", 0), + "output_tokens": usage_metadata.get("output_tokens", 0), + "total_tokens": usage_metadata.get("total_tokens", 0), + }, + } + return parsed_result diff --git a/src/ai_hawk/llm/prompts.py b/src/ai_hawk/llm/prompts.py deleted file mode 100644 index cc7bc80a1..000000000 --- a/src/ai_hawk/llm/prompts.py +++ /dev/null @@ -1,547 +0,0 @@ -# Personal Information Template -personal_information_template = """ -Answer the following question based on the provided personal information. - -## Rules -- Answer questions directly. - -## Example -My resume: John Doe, born on 01/01/1990, living in Milan, Italy. -Question: What is your city? - Milan - -Personal Information: {resume_section} -Question: {question} -Do not output anything else in the response other than the answer. -""" - -# Self Identification Template -self_identification_template = """ -Answer the following question based on the provided self-identification details. - -## Rules -- Answer questions directly. - -## Example -My resume: Male, uses he/him pronouns, not a veteran, no disability. -Question: What are your gender? -Male - -Self-Identification: {resume_section} -Question: {question} -Do not output anything else in the response other than the answer. -""" - -# Legal Authorization Template -legal_authorization_template = """ -Answer the following question based on the provided legal authorization details. - -## Rules -- Answer questions directly. - -## Example -My resume: Authorized to work in the EU, no US visa required. -Question: Are you legally allowed to work in the EU? -Yes - -Legal Authorization: {resume_section} -Question: {question} -Do not output anything else in the response other than the answer. -""" - -# Work Preferences Template -work_preferences_template = """ -Answer the following question based on the provided work preferences. - -## Rules -- Answer questions directly. - -## Example -My resume: Open to remote work, willing to relocate. -Question: Are you open to remote work? -Yes - -Work Preferences: {resume_section} -Question: {question} -Do not output anything else in the response other than the answer. -""" - -# Education Details Template -education_details_template = """ -Answer the following question based on the provided education details. - -## Rules -- Answer questions directly. -- If it seems likely that you have the experience, even if not explicitly defined, answer as if you have the experience. -- If unsure, respond with "I have no experience with that, but I learn fast" or "Not yet, but willing to learn." -- Keep the answer under 140 characters. - -## Example -My resume: Bachelor's degree in Computer Science with experience in Python. -Question: Do you have experience with Python? -Yes, I have experience with Python. - -Education Details: {resume_section} -Question: {question} -Do not output anything else in the response other than the answer. -""" - -# Experience Details Template -experience_details_template = """ -Answer the following question based on the provided experience details. - -## Rules -- Answer questions directly. -- If it seems likely that you have the experience, even if not explicitly defined, answer as if you have the experience. -- If unsure, respond with "I have no experience with that, but I learn fast" or "Not yet, but willing to learn." -- Keep the answer under 140 characters. - -## Example -My resume: 3 years as a software developer with leadership experience. -Question: Do you have leadership experience? -Yes, I have 3 years of leadership experience. - -Experience Details: {resume_section} -Question: {question} -Do not output anything else in the response other than the answer. -""" - -# Projects Template -projects_template = """ -Answer the following question based on the provided project details. - -## Rules -- Answer questions directly. -- If it seems likely that you have the experience, even if not explicitly defined, answer as if you have the experience. -- Keep the answer under 140 characters. - -## Example -My resume: Led the development of a mobile app, repository available. -Question: Have you led any projects? -Yes, led the development of a mobile app - -Projects: {resume_section} -Question: {question} -Do not output anything else in the response other than the answer. -""" - -# Availability Template -availability_template = """ -Answer the following question based on the provided availability details. - -## Rules -- Answer questions directly. -- Keep the answer under 140 characters. -- Use periods only if the answer has multiple sentences. - -## Example -My resume: Available to start immediately. -Question: When can you start? -I can start immediately. - -Availability: {resume_section} -Question: {question} -Do not output anything else in the response other than the answer. -""" - -# Salary Expectations Template -salary_expectations_template = """ -Answer the following question based on the provided salary expectations. - -## Rules -- Answer questions directly. -- Keep the answer under 140 characters. -- Use periods only if the answer has multiple sentences. - -## Example -My resume: Looking for a salary in the range of 50k-60k USD. -Question: What are your salary expectations? -55000. - -Salary Expectations: {resume_section} -Question: {question} -Do not output anything else in the response other than the answer. -""" - -# Certifications Template -certifications_template = """ -Answer the following question based on the provided certifications. - -## Rules -- Answer questions directly. -- If it seems likely that you have the experience, even if not explicitly defined, answer as if you have the experience. -- If unsure, respond with "I have no experience with that, but I learn fast" or "Not yet, but willing to learn." -- Keep the answer under 140 characters. - -## Example -My resume: Certified in Project Management Professional (PMP). -Question: Do you have PMP certification? -Yes, I am PMP certified. - -Certifications: {resume_section} -Question: {question} -Do not output anything else in the response other than the answer. -""" - -# Languages Template -languages_template = """ -Answer the following question based on the provided language skills. - -## Rules -- Answer questions directly. -- If it seems likely that you have the experience, even if not explicitly defined, answer as if you have the experience. -- If unsure, respond with "I have no experience with that, but I learn fast" or "Not yet, but willing to learn." -- Keep the answer under 140 characters. Do not add any additional languages what is not in my experience - -## Example -My resume: Fluent in Italian and English. -Question: What languages do you speak? -Fluent in Italian and English. - -Languages: {resume_section} -Question: {question} -Do not output anything else in the response other than the answer. -""" - -# Interests Template -interests_template = """ -Answer the following question based on the provided interests. - -## Rules -- Answer questions directly. -- Keep the answer under 140 characters. -- Use periods only if the answer has multiple sentences. - -## Example -My resume: Interested in AI and data science. -Question: What are your interests? -AI and data science. - -Interests: {resume_section} -Question: {question} -Do not output anything else in the response other than the answer. -""" - -summarize_prompt_template = """ -As a seasoned HR expert, your task is to identify and outline the key skills and requirements necessary for the position of this job. Use the provided job description as input to extract all relevant information. This will involve conducting a thorough analysis of the job's responsibilities and the industry standards. You should consider both the technical and soft skills needed to excel in this role. Additionally, specify any educational qualifications, certifications, or experiences that are essential. Your analysis should also reflect on the evolving nature of this role, considering future trends and how they might affect the required competencies. - -Rules: -Remove boilerplate text -Include only relevant information to match the job description against the resume - -# Analysis Requirements -Your analysis should include the following sections: -Technical Skills: List all the specific technical skills required for the role based on the responsibilities described in the job description. -Soft Skills: Identify the necessary soft skills, such as communication abilities, problem-solving, time management, etc. -Educational Qualifications and Certifications: Specify the essential educational qualifications and certifications for the role. -Professional Experience: Describe the relevant work experiences that are required or preferred. -Role Evolution: Analyze how the role might evolve in the future, considering industry trends and how these might influence the required skills. - -# Final Result: -Your analysis should be structured in a clear and organized document with distinct sections for each of the points listed above. Each section should contain: -This comprehensive overview will serve as a guideline for the recruitment process, ensuring the identification of the most qualified candidates. - -# Job Description: -``` -{text} -``` - ---- - -# Job Description Summary""" - -coverletter_template = """ -Compose a brief and impactful cover letter based on the provided job description and resume. The letter should be no longer than three paragraphs and should be written in a professional, yet conversational tone. Avoid using any placeholders, and ensure that the letter flows naturally and is tailored to the job. - -Analyze the job description to identify key qualifications and requirements. Introduce the candidate succinctly, aligning their career objectives with the role. Highlight relevant skills and experiences from the resume that directly match the job’s demands, using specific examples to illustrate these qualifications. Reference notable aspects of the company, such as its mission or values, that resonate with the candidate’s professional goals. Conclude with a strong statement of why the candidate is a good fit for the position, expressing a desire to discuss further. - -Please write the cover letter in a way that directly addresses the job role and the company’s characteristics, ensuring it remains concise and engaging without unnecessary embellishments. The letter should be formatted into paragraphs and should not include a greeting or signature. - -## Rules: -- Provide only the text of the cover letter. -- Do not include any introductions, explanations, or additional information. -- The letter should be formatted into paragraph. - -## Company Name: -{company} - -## Job Description: -``` -{job_description} -``` -## My resume: -``` -{resume} -``` -""" - -numeric_question_template = """ -Read the following resume carefully and answer the specific questions regarding the candidate's experience with a number of years. Follow these strategic guidelines when responding: - -1. **Related and Inferred Experience:** - - **Similar Technologies:** If experience with a specific technology is not explicitly stated, but the candidate has experience with similar or related technologies, provide a plausible number of years reflecting this related experience. For instance, if the candidate has experience with Python and projects involving technologies similar to Java, estimate a reasonable number of years for Java. - - **Projects and Studies:** Examine the candidate’s projects and studies to infer skills not explicitly mentioned. Complex and advanced projects often indicate deeper expertise. - -2. **Indirect Experience and Academic Background:** - - **Type of University and Studies:** Consider the type of university and course followed. - - **Exam Grades:** Consider exam grades achieved. High grades in relevant subjects can indicate stronger proficiency and understanding. - - **Relevant thesis:** Consider the thesis of the candidate has worked. Advanced projects suggest deeper skills. - - **Roles and Responsibilities:** Evaluate the roles and responsibilities held to estimate experience with specific technologies or skills. - - -3. **Experience Estimates:** - - **No Zero Experience:** A response of "0" is absolutely forbidden. If direct experience cannot be confirmed, provide a minimum of "2" years based on inferred or related experience. - - **For Low Experience (up to 5 years):** Estimate experience based on inferred bacherol, skills and projects, always providing at least "2" years when relevant. - - **For High Experience:** For high levels of experience, provide a number based on clear evidence from the resume. Avoid making inferences for high experience levels unless the evidence is strong. - -4. **Rules:** - - Answer the question directly with a number, avoiding "0" entirely. - -## Example 1 -``` -## Curriculum - -I had a degree in computer science. I have worked years with MQTT protocol. - -## Question - -How many years of experience do you have with IoT? - -## Answer - -4 -``` -## Example 1 -``` -## Curriculum - -I had a degree in computer science. - -## Question - -How many years of experience do you have with Bash? - -## Answer - -2 -``` - -## Example 2 -``` -## Curriculum - -I am a software engineer with 5 years of experience in Swift and Python. I have worked on an AI project. - -## Question - -How many years of experience do you have with AI? - -## Answer - -2 -``` - -## Resume: -``` -{resume_educations} -{resume_jobs} -{resume_projects} -``` - -## Question: -{question} - ---- - -When responding, consider all available information, including projects, work experience, and academic background, to provide an accurate and well-reasoned answer. Make every effort to infer relevant experience and avoid defaulting to 0 if any related experience can be estimated. -Do not output anything else in the response other than the answer. -""" - -options_template = """The following is a resume and an answered question about the resume, the answer is one of the options. - -## Rules -- Never choose the default/placeholder option, examples are: 'Select an option', 'None', 'Choose from the options below', etc. -- The answer must be one of the options. -- The answer must exclusively contain one of the options. - -## Example -My resume: I'm a software engineer with 10 years of experience on swift, python, C, C++. -Question: How many years of experience do you have on python? -Options: [1-2, 3-5, 6-10, 10+] -10+ - ------ - -## My resume: -``` -{resume} -{job_application_profile} -``` - -## Question: -{question} - -## Options: -{options} ------ -Do not output anything else in the response other than the answer. -## """ - -try_to_fix_template = """\ -The objective is to fix the text of a form input on a web page. - -## Rules -- Use the error to fix the original text. -- The error "Please enter a valid answer" usually means the text is too large, shorten the reply to less than a tweet. -- For errors like "Enter a whole number between 3 and 30", just need a number. - ------ - -## Form Question -{question} - -## Input -{input} - -## Error -{error} - -## Fixed Input -""" - -func_summarize_prompt_template = """ - Following are two texts, one with placeholders and one without, the second text uses information from the first text to fill the placeholders. - - ## Rules - - A placeholder is a string like "[[placeholder]]". E.g. "[[company]]", "[[job_title]]", "[[years_of_experience]]"... - - The task is to remove the placeholders from the text. - - If there is no information to fill a placeholder, remove the placeholder, and adapt the text accordingly. - - No placeholders should remain in the text. - - ## Example - Text with placeholders: "I'm a software engineer engineer with 10 years of experience on [placeholder] and [placeholder]." - Text without placeholders: "I'm a software engineer with 10 years of experience." - - ----- - - ## Text with placeholders: - {text_with_placeholders} - - ## Text without placeholders:""" - -is_relavant_position_template = """ - Evaluate whether the provided resume meets the requirements outlined in the job description. Determine if the candidate is suitable for the job based on the information provided. - -Job Description: {job_description} - -Resume: {resume} - -Instructions: -1. Extract the key requirements from the job description, identifying hard requirements (must-haves) and soft requirements (nice-to-haves). -2. Identify the relevant qualifications from the resume. -3. Compare the qualifications against the requirements, ensuring all hard requirements are met. Allow for a 1-year experience gap if applicable, as experience is usually a hard requirement. -4. Provide a suitability score from 1 to 10. where 1 indicates the candidate does not meet any requirements and 10 indicates the candidate meets all requirements. -5. Provide a brief reasoning for the score, highlighting which requirements are met and which are not. - -Output Format (Strictly follow this format): -Score: [numerical score] -Reasoning: [brief explanation] -Do not output anything else in the response other than the score and reasoning. -""" - -resume_or_cover_letter_template = """ -Given the following phrase, respond with only 'resume' if the phrase is about a resume, or 'cover' if it's about a cover letter. -If the phrase contains only one word 'upload', consider it as 'cover'. -If the phrase contains 'upload resume', consider it as 'resume'. -Do not provide any additional information or explanations. - -phrase: {phrase} -""" - -determine_section_template = """You are assisting a bot designed to automatically apply for jobs on AIHawk. The bot receives various questions about job applications and needs to determine the most relevant section of the resume to provide an accurate response. - -For the following question: '{question}', determine which section of the resume is most relevant. -Respond with exactly one of the following options: -- Personal information -- Self Identification -- Legal Authorization -- Work Preferences -- Education Details -- Experience Details -- Projects -- Availability -- Salary Expectations -- Certifications -- Languages -- Interests -- Cover letter - -Here are detailed guidelines to help you choose the correct section: - -1. **Personal Information**: -- **Purpose**: Contains your basic contact details and online profiles. -- **Use When**: The question is about how to contact you or requests links to your professional online presence. -- **Examples**: Email address, phone number, AIHawk profile, GitHub repository, personal website. - -2. **Self Identification**: -- **Purpose**: Covers personal identifiers and demographic information. -- **Use When**: The question pertains to your gender, pronouns, veteran status, disability status, or ethnicity. -- **Examples**: Gender, pronouns, veteran status, disability status, ethnicity. - -3. **Legal Authorization**: -- **Purpose**: Details your work authorization status and visa requirements. -- **Use When**: The question asks about your ability to work in specific countries or if you need sponsorship or visas. -- **Examples**: Work authorization in EU and US, visa requirements, legally allowed to work. - -4. **Work Preferences**: -- **Purpose**: Specifies your preferences regarding work conditions and job roles. -- **Use When**: The question is about your preferences for remote work, in-person work, relocation, and willingness to undergo assessments or background checks. -- **Examples**: Remote work, in-person work, open to relocation, willingness to complete assessments. - -5. **Education Details**: -- **Purpose**: Contains information about your academic qualifications. -- **Use When**: The question concerns your degrees, universities attended, GPA, and relevant coursework. -- **Examples**: Degree, university, GPA, field of study, exams. - -6. **Experience Details**: -- **Purpose**: Details your professional work history and key responsibilities. -- **Use When**: The question pertains to your job roles, responsibilities, and achievements in previous positions. -- **Examples**: Job positions, company names, key responsibilities, skills acquired. - -7. **Projects**: -- **Purpose**: Highlights specific projects you have worked on. -- **Use When**: The question asks about particular projects, their descriptions, or links to project repositories. -- **Examples**: Project names, descriptions, links to project repositories. - -8. **Availability**: -- **Purpose**: Provides information on your availability for new roles. -- **Use When**: The question is about how soon you can start a new job or your notice period. -- **Examples**: Notice period, availability to start. - -9. **Salary Expectations**: -- **Purpose**: Covers your expected salary range. -- **Use When**: The question pertains to your salary expectations or compensation requirements. -- **Examples**: Desired salary range. - -10. **Certifications**: - - **Purpose**: Lists your professional certifications or licenses. - - **Use When**: The question involves your certifications or qualifications from recognized organizations. - - **Examples**: Certification names, issuing bodies, dates of validity. - -11. **Languages**: - - **Purpose**: Describes the languages you can speak and your proficiency levels. - - **Use When**: The question asks about your language skills or proficiency in specific languages. - - **Examples**: Languages spoken, proficiency levels. - -12. **Interests**: - - **Purpose**: Details your personal or professional interests. - - **Use When**: The question is about your hobbies, interests, or activities outside of work. - - **Examples**: Personal hobbies, professional interests. - -13. **Cover Letter**: - - **Purpose**: Contains your personalized cover letter or statement. - - **Use When**: The question involves your cover letter or specific written content intended for the job application. - - **Examples**: Cover letter content, personalized statements. - -Provide only the exact name of the section from the list above with no additional text. -""" \ No newline at end of file diff --git a/src/job_application_profile.py b/src/ai_hawk/resume_schemas/job_application_profile.py similarity index 100% rename from src/job_application_profile.py rename to src/ai_hawk/resume_schemas/job_application_profile.py diff --git a/src/ai_hawk/resume_schemas/resume.py b/src/ai_hawk/resume_schemas/resume.py new file mode 100644 index 000000000..ee70e74a7 --- /dev/null +++ b/src/ai_hawk/resume_schemas/resume.py @@ -0,0 +1,197 @@ +from dataclasses import dataclass, field +from typing import List, Dict, Any, Optional, Union +import yaml +from pydantic import BaseModel, EmailStr, HttpUrl, Field + + + +class PersonalInformation(BaseModel): + name: Optional[str] + surname: Optional[str] + date_of_birth: Optional[str] + country: Optional[str] + city: Optional[str] + address: Optional[str] + zip_code: Optional[str] = Field(None, min_length=5, max_length=10) + phone_prefix: Optional[str] + phone: Optional[str] + email: Optional[EmailStr] + github: Optional[HttpUrl] = None + linkedin: Optional[HttpUrl] = None + + +class EducationDetails(BaseModel): + education_level: Optional[str] + institution: Optional[str] + field_of_study: Optional[str] + final_evaluation_grade: Optional[str] + start_date: Optional[str] + year_of_completion: Optional[int] + exam: Optional[Union[List[Dict[str, str]], Dict[str, str]]] = None + + +class ExperienceDetails(BaseModel): + position: Optional[str] + company: Optional[str] + employment_period: Optional[str] + location: Optional[str] + industry: Optional[str] + key_responsibilities: Optional[List[Dict[str, str]]] = None + skills_acquired: Optional[List[str]] = None + + +class Project(BaseModel): + name: Optional[str] + description: Optional[str] + link: Optional[HttpUrl] = None + + +class Achievement(BaseModel): + name: Optional[str] + description: Optional[str] + + +class Certifications(BaseModel): + name: Optional[str] + description: Optional[str] + + +class Language(BaseModel): + language: Optional[str] + proficiency: Optional[str] + + +class Availability(BaseModel): + notice_period: Optional[str] + + +class SalaryExpectations(BaseModel): + salary_range_usd: Optional[str] + + +class SelfIdentification(BaseModel): + gender: Optional[str] + pronouns: Optional[str] + veteran: Optional[str] + disability: Optional[str] + ethnicity: Optional[str] + + +class LegalAuthorization(BaseModel): + eu_work_authorization: Optional[str] + us_work_authorization: Optional[str] + requires_us_visa: Optional[str] + requires_us_sponsorship: Optional[str] + requires_eu_visa: Optional[str] + legally_allowed_to_work_in_eu: Optional[str] + legally_allowed_to_work_in_us: Optional[str] + requires_eu_sponsorship: Optional[str] + + +class Resume(BaseModel): + personal_information: Optional[PersonalInformation] + education_details: Optional[List[EducationDetails]] = None + experience_details: Optional[List[ExperienceDetails]] = None + projects: Optional[List[Project]] = None + achievements: Optional[List[Achievement]] = None + certifications: Optional[List[Certifications]] = None + languages: Optional[List[Language]] = None + interests: Optional[List[str]] = None + + @staticmethod + def normalize_exam_format(exam): + if isinstance(exam, dict): + return [{k: v} for k, v in exam.items()] + return exam + + def __init__(self, yaml_str: str): + try: + # Parse the YAML string + data = yaml.safe_load(yaml_str) + + if 'education_details' in data: + for ed in data['education_details']: + if 'exam' in ed: + ed['exam'] = self.normalize_exam_format(ed['exam']) + + # Create an instance of Resume from the parsed data + super().__init__(**data) + except yaml.YAMLError as e: + raise ValueError("Error parsing YAML file.") from e + except Exception as e: + raise Exception(f"Unexpected error while parsing YAML: {e}") from e + + + def _process_personal_information(self, data: Dict[str, Any]) -> PersonalInformation: + try: + return PersonalInformation(**data) + except TypeError as e: + raise TypeError(f"Invalid data for PersonalInformation: {e}") from e + except AttributeError as e: + raise AttributeError(f"AttributeError in PersonalInformation: {e}") from e + except Exception as e: + raise Exception(f"Unexpected error in PersonalInformation processing: {e}") from e + + def _process_education_details(self, data: List[Dict[str, Any]]) -> List[EducationDetails]: + education_list = [] + for edu in data: + try: + exams = [Exam(name=k, grade=v) for k, v in edu.get('exam', {}).items()] + education = EducationDetails( + education_level=edu.get('education_level'), + institution=edu.get('institution'), + field_of_study=edu.get('field_of_study'), + final_evaluation_grade=edu.get('final_evaluation_grade'), + start_date=edu.get('start_date'), + year_of_completion=edu.get('year_of_completion'), + exam=exams + ) + education_list.append(education) + except KeyError as e: + raise KeyError(f"Missing field in education details: {e}") from e + except TypeError as e: + raise TypeError(f"Invalid data for Education: {e}") from e + except AttributeError as e: + raise AttributeError(f"AttributeError in Education: {e}") from e + except Exception as e: + raise Exception(f"Unexpected error in Education processing: {e}") from e + return education_list + + def _process_experience_details(self, data: List[Dict[str, Any]]) -> List[ExperienceDetails]: + experience_list = [] + for exp in data: + try: + key_responsibilities = [ + Responsibility(description=list(resp.values())[0]) + for resp in exp.get('key_responsibilities', []) + ] + skills_acquired = [str(skill) for skill in exp.get('skills_acquired', [])] + experience = ExperienceDetails( + position=exp['position'], + company=exp['company'], + employment_period=exp['employment_period'], + location=exp['location'], + industry=exp['industry'], + key_responsibilities=key_responsibilities, + skills_acquired=skills_acquired + ) + experience_list.append(experience) + except KeyError as e: + raise KeyError(f"Missing field in experience details: {e}") from e + except TypeError as e: + raise TypeError(f"Invalid data for Experience: {e}") from e + except AttributeError as e: + raise AttributeError(f"AttributeError in Experience: {e}") from e + except Exception as e: + raise Exception(f"Unexpected error in Experience processing: {e}") from e + return experience_list + + +@dataclass +class Exam: + name: str + grade: str + +@dataclass +class Responsibility: + description: str \ No newline at end of file diff --git a/src/job_portals/application_form_elements.py b/src/job_portals/application_form_elements.py deleted file mode 100644 index c01cc2ee9..000000000 --- a/src/job_portals/application_form_elements.py +++ /dev/null @@ -1,24 +0,0 @@ -from enum import Enum - -from attr import dataclass - - -class TextBoxQuestionType(Enum): - NUMERIC = "numeric" - TEXTBOX = "textbox" - -class SelectQuestionType(Enum): - SINGLE_SELECT = "single_select" - MULTI_SELECT = "multi_select" - -@dataclass -class SelectQuestion: - question: str - options: list[str] - type: SelectQuestionType - - -@dataclass -class TextBoxQuestion: - question: str - type: TextBoxQuestionType diff --git a/src/job_portals/base_job_portal.py b/src/job_portals/base_job_portal.py deleted file mode 100644 index 502e3a867..000000000 --- a/src/job_portals/base_job_portal.py +++ /dev/null @@ -1,222 +0,0 @@ -from abc import ABC, abstractmethod -from re import A - -from constants import LINKEDIN -from src.job_portals.application_form_elements import SelectQuestion, TextBoxQuestion -from src.ai_hawk.authenticator import AIHawkAuthenticator -from src.job import Job -from src.jobContext import JobContext - -from selenium.webdriver.remote.webelement import WebElement -from typing import List - - -class WebPage(ABC): - - def __init__(self, driver): - self.driver = driver - - -class BaseJobsPage(WebPage): - - def __init__(self, driver, parameters): - super().__init__(driver) - self.parameters = parameters - - @abstractmethod - def next_job_page(self, position, location, page_number): - pass - - @abstractmethod - def job_tile_to_job(self, job_tile: WebElement) -> Job: - pass - - @abstractmethod - def get_jobs_from_page(self, scroll=False) -> List[WebElement]: - pass - - -class BaseJobPage(WebPage): - - def __init__(self, driver): - super().__init__(driver) - - @abstractmethod - def goto_job_page(self, job: Job): - pass - - @abstractmethod - def get_apply_button(self, job_context: JobContext) -> WebElement: - pass - - @abstractmethod - def get_job_description(self, job: Job) -> str: - pass - - @abstractmethod - def get_recruiter_link(self) -> str: - pass - - @abstractmethod - def click_apply_button(self, job_context: JobContext) -> None: - pass - - -class BaseApplicationPage(WebPage): - - def __init__(self, driver): - super().__init__(driver) - - @abstractmethod - def has_next_button(self) -> bool: - pass - - @abstractmethod - def click_next_button(self) -> None: - pass - - @abstractmethod - def has_submit_button(self) -> bool: - pass - - @abstractmethod - def click_submit_button(self) -> None: - pass - - @abstractmethod - def has_errors(self) -> None: - pass - - @abstractmethod - def handle_errors(self) -> None: - """this methos is also called as fix errors""" - pass - - @abstractmethod - def check_for_errors(self) -> None: - """As the current impl needs this, later when we add retry mechanism, we will be moving to has errors and handle errors""" - pass - - @abstractmethod - def get_input_elements(self) -> List[WebElement]: - """this method will update to Enum / other easy way (in future) instead of webList""" - pass - - @abstractmethod - def is_upload_field(self, element: WebElement) -> bool: - pass - - @abstractmethod - def get_file_upload_elements(self) -> List[WebElement]: - pass - - @abstractmethod - def get_upload_element_heading(self, element: WebElement) -> str: - pass - - @abstractmethod - def upload_file(self, element: WebElement, file_path: str) -> None: - pass - - @abstractmethod - def get_form_sections(self) -> List[WebElement]: - pass - - @abstractmethod - def is_terms_of_service(self, section: WebElement) -> bool: - pass - - @abstractmethod - def accept_terms_of_service(self, section: WebElement) -> None: - pass - - @abstractmethod - def is_radio_question(self, section: WebElement) -> bool: - pass - - @abstractmethod - def web_element_to_radio_question(self, section: WebElement) -> SelectQuestion: - pass - - @abstractmethod - def select_radio_option( - self, radio_question_web_element: WebElement, answer: str - ) -> None: - pass - - @abstractmethod - def is_textbox_question(self, section: WebElement) -> bool: - pass - - @abstractmethod - def web_element_to_textbox_question(self, section: WebElement) -> TextBoxQuestion: - pass - - @abstractmethod - def fill_textbox_question(self, section: WebElement, answer: str) -> None: - pass - - @abstractmethod - def is_dropdown_question(self, section: WebElement) -> bool: - pass - - @abstractmethod - def web_element_to_dropdown_question(self, section: WebElement) -> SelectQuestion: - pass - - @abstractmethod - def select_dropdown_option(self, section: WebElement, answer: str) -> None: - pass - - @abstractmethod - def discard(self) -> None: - pass - - @abstractmethod - def save(self) -> None: - """ this can be also be considered as save draft / save progress """ - pass - - -class BaseJobPortal(ABC): - - def __init__(self, driver): - self.driver = driver - - @property - @abstractmethod - def jobs_page(self) -> BaseJobsPage: - pass - - @property - @abstractmethod - def job_page(self) -> BaseJobPage: - pass - - @property - @abstractmethod - def authenticator(self) -> AIHawkAuthenticator: - pass - - @property - @abstractmethod - def application_page(self) -> BaseApplicationPage: - pass - - -def get_job_portal(portal_name, driver, parameters): - from src.job_portals.linkedIn.linkedin import LinkedIn - - if portal_name == LINKEDIN: - return LinkedIn(driver, parameters) - else: - raise ValueError(f"Unknown job portal: {portal_name}") - - -def get_authenticator(driver, platform): - from src.job_portals.linkedIn.authenticator import LinkedInAuthenticator - - if platform == LINKEDIN: - return LinkedInAuthenticator(driver) - else: - raise NotImplementedError(f"Platform {platform} not implemented yet.") diff --git a/src/job_portals/linkedIn/README b/src/job_portals/linkedIn/README deleted file mode 100644 index 48a61b607..000000000 --- a/src/job_portals/linkedIn/README +++ /dev/null @@ -1,4 +0,0 @@ -# LinkedIn Job Portal - -**Note:** This LinkedIn job portal is no longer maintained. It is kept for copyright and educational purposes, as well as for demonstration purposes. This represents past work that this project was doing earlier. It is preserved as a record of past work or as a memory. - diff --git a/src/job_portals/linkedIn/authenticator.py b/src/job_portals/linkedIn/authenticator.py deleted file mode 100644 index e98635996..000000000 --- a/src/job_portals/linkedIn/authenticator.py +++ /dev/null @@ -1,39 +0,0 @@ -from src.ai_hawk.authenticator import AIHawkAuthenticator -from src.logging import logger - -from selenium.common.exceptions import TimeoutException -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.wait import WebDriverWait - - -class LinkedInAuthenticator(AIHawkAuthenticator): - - @property - def home_url(self): - return "https://www.linkedin.com" - - def navigate_to_login(self): - return self.driver.get("https://www.linkedin.com/login") - - def handle_security_checks(self): - try: - logger.debug("Handling security check...") - WebDriverWait(self.driver, 10).until( - EC.url_contains('https://www.linkedin.com/checkpoint/challengesV2/') - ) - logger.warning("Security checkpoint detected. Please complete the challenge.") - WebDriverWait(self.driver, 300).until( - EC.url_contains('https://www.linkedin.com/feed/') - ) - logger.info("Security check completed") - except TimeoutException: - logger.error("Security check not completed. Please try again later.") - - @property - def is_logged_in(self): - keywords = ['feed', 'mynetwork','jobs','messaging','notifications'] - return any(item in self.driver.current_url for item in keywords) and 'linkedin.com' in self.driver.current_url - - def __init__(self, driver): - super().__init__(driver) - pass \ No newline at end of file diff --git a/src/job_portals/linkedIn/easy_application_page.py b/src/job_portals/linkedIn/easy_application_page.py deleted file mode 100644 index e4e02823f..000000000 --- a/src/job_portals/linkedIn/easy_application_page.py +++ /dev/null @@ -1,384 +0,0 @@ -import time -import traceback -from typing import List -from xml.dom.minidom import Element -from loguru import logger -from selenium.webdriver.remote.webelement import WebElement -from tenacity import retry -from job_portals.application_form_elements import ( - SelectQuestion, - SelectQuestionType, - TextBoxQuestion, - TextBoxQuestionType, -) -from job_portals.base_job_portal import BaseApplicationPage -from selenium.webdriver.common.by import By -from selenium.webdriver.support.ui import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.ui import Select -from selenium.webdriver.common.keys import Keys -from selenium.common.exceptions import NoSuchElementException - -import utils -from utils import time_utils - - -class LinkedInEasyApplicationPage(BaseApplicationPage): - - def __init__(self, driver): - super().__init__(driver) - - def has_next_button(self) -> bool: - logger.debug("Checking for next button") - button = self.driver.find_element(By.CLASS_NAME, "artdeco-button--primary") - return "next" in button.text.lower() - - def click_next_button(self) -> None: - logger.debug("Clicking next button") - button = self.driver.find_element(By.CLASS_NAME, "artdeco-button--primary") - if "next" not in button.text.lower(): - raise Exception("Next button not found") - time_utils.short_sleep() - button.click() - time_utils.medium_sleep() - - def is_upload_field(self, element: WebElement) -> bool: - is_upload = bool(element.find_elements(By.XPATH, ".//input[@type='file']")) - logger.debug(f"Element is upload field: {is_upload}") - return is_upload - - def get_input_elements(self) -> List[WebElement]: - try: - easy_apply_content = WebDriverWait(self.driver, 10).until( - EC.presence_of_element_located( - (By.CLASS_NAME, "jobs-easy-apply-content") - ) - ) - - input_elements = easy_apply_content.find_elements( - By.CLASS_NAME, "jobs-easy-apply-form-section__grouping" - ) - return input_elements - except Exception as e: - logger.error(f"Failed to find form elements: {e} {traceback.format_exc()}") - raise e - - def check_for_errors(self) -> None: - """ - as the current impl needs this, later when we add retry mechanism, we will be moving to has errors and handle errors - """ - logger.debug("Checking for form errors") - error_elements = self.driver.find_elements( - By.CLASS_NAME, "artdeco-inline-feedback--error" - ) - if error_elements: - logger.error(f"Form submission failed with errors: {error_elements}") - raise Exception( - f"Failed answering or file upload. {str([e.text for e in error_elements])}" - ) - - def has_errors(self) -> bool: - logger.debug("Checking for form errors") - error_elements = self.driver.find_elements( - By.CLASS_NAME, "artdeco-inline-feedback--error" - ) - return len(error_elements) > 0 - - def handle_errors(self) -> None: - logger.debug("Checking for form errors") - error_elements = self.driver.find_elements( - By.CLASS_NAME, "artdeco-inline-feedback--error" - ) - if error_elements: - logger.error(f"Form submission failed with errors: {error_elements}") - raise Exception( - f"Failed answering or file upload. {str([e.text for e in error_elements])}" - ) - - def has_submit_button(self) -> bool: - logger.debug("Checking for submit button") - button = self.driver.find_element(By.CLASS_NAME, "artdeco-button--primary") - return "submit application" in button.text.lower() - - def click_submit_button(self) -> None: - button = self.driver.find_element(By.CLASS_NAME, "artdeco-button--primary") - if "submit application" not in button.text.lower(): - raise Exception("Submit button not found") - logger.debug("Submit button found, submitting application") - self._unfollow_company() - time_utils.short_sleep() - button.click() - logger.info("Application submitted") - time_utils.short_sleep() - - def _unfollow_company(self) -> None: - try: - logger.debug("Unfollowing company") - follow_checkbox = self.driver.find_element( - By.XPATH, "//label[contains(.,'to stay up to date with their page.')]" - ) - follow_checkbox.click() - except Exception as e: - logger.debug(f"Failed to unfollow company: {e}") - - def get_file_upload_elements(self) -> List[WebElement]: - try: - show_more_button = self.driver.find_element( - By.XPATH, "//button[contains(@aria-label, 'Show more resumes')]" - ) - show_more_button.click() - logger.debug("Clicked 'Show more resumes' button") - except NoSuchElementException: - logger.debug("'Show more resumes' button not found, continuing...") - - file_upload_elements = self.driver.find_elements( - By.XPATH, "//input[@type='file']" - ) - return file_upload_elements - - def get_upload_element_heading(self, element: WebElement) -> str: - parent = element.find_element(By.XPATH, "..") - return parent.text.lower() - - def upload_file(self, element: WebElement, file_path: str) -> None: - logger.debug(f"Uploading file: {file_path}") - self.driver.execute_script("arguments[0].classList.remove('hidden')", element) - element.send_keys(file_path) - logger.debug("File uploaded") - time_utils.short_sleep() - - def get_form_sections(self) -> List[WebElement]: - form_sections = self.driver.find_elements( - By.CLASS_NAME, "jobs-easy-apply-form-section__grouping" - ) - return form_sections - - def accept_terms_of_service(self, section: WebElement) -> None: - element = section - checkbox = element.find_elements(By.TAG_NAME, "label") - if checkbox and any( - term in checkbox[0].text.lower() - for term in ["terms of service", "privacy policy", "terms of use"] - ): - checkbox[0].click() - logger.debug("Clicked terms of service checkbox") - - def is_terms_of_service(self, section: WebElement) -> bool: - element = section - checkbox = element.find_elements(By.TAG_NAME, "label") - return bool(checkbox) and any( - term in checkbox[0].text.lower() - for term in ["terms of service", "privacy policy", "terms of use"] - ) - - def is_radio_question(self, section: WebElement) -> bool: - question = section.find_element(By.CLASS_NAME, "jobs-easy-apply-form-element") - radios = question.find_elements(By.CLASS_NAME, "fb-text-selectable__option") - return bool(radios) - - def web_element_to_radio_question(self, section: WebElement) -> SelectQuestion: - question = section.find_element(By.CLASS_NAME, "jobs-easy-apply-form-element") - radios = question.find_elements(By.CLASS_NAME, "fb-text-selectable__option") - question_text = section.text.lower() - options = [radio.text.lower() for radio in radios] - return SelectQuestion( - question=question_text, - options=options, - type=SelectQuestionType.SINGLE_SELECT, - ) - - def select_radio_option(self, section: WebElement, answer: str) -> None: - question = section.find_element(By.CLASS_NAME, "jobs-easy-apply-form-element") - radios = question.find_elements(By.CLASS_NAME, "fb-text-selectable__option") - logger.debug(f"Selecting radio option: {answer}") - for radio in radios: - if answer in radio.text.lower(): - radio.find_element(By.TAG_NAME, "label").click() - return - radios[-1].find_element(By.TAG_NAME, "label").click() - - def is_textbox_question(self, section: WebElement) -> bool: - logger.debug("Searching for text fields in the section.") - text_fields = section.find_elements( - By.TAG_NAME, "input" - ) + section.find_elements(By.TAG_NAME, "textarea") - return bool(text_fields) - - def web_element_to_textbox_question(self, section: WebElement) -> TextBoxQuestion: - logger.debug("Searching for text fields in the section.") - text_fields = section.find_elements( - By.TAG_NAME, "input" - ) + section.find_elements(By.TAG_NAME, "textarea") - - text_field = text_fields[0] - question_text = section.find_element(By.TAG_NAME, "label").text.lower().strip() - logger.debug(f"Found text field with label: {question_text}") - - is_numeric = self._is_numeric_field(text_field) - - question_type = ( - TextBoxQuestionType.NUMERIC if is_numeric else TextBoxQuestionType.TEXTBOX - ) - return TextBoxQuestion(question=question_text, type=question_type) - - def fill_textbox_question(self, section: WebElement, answer: str) -> None: - logger.debug("Searching for text fields in the section.") - text_fields = section.find_elements( - By.TAG_NAME, "input" - ) + section.find_elements(By.TAG_NAME, "textarea") - - text_field = text_fields[0] - question_text = section.find_element(By.TAG_NAME, "label").text.lower().strip() - logger.debug(f"Found text field with label: {question_text}") - - self._enter_text(text_field, answer) - - time.sleep(1) - text_field.send_keys(Keys.ARROW_DOWN) - text_field.send_keys(Keys.ENTER) - logger.debug("Selected first option from the dropdown.") - - def _enter_text(self, element: WebElement, text: str) -> None: - logger.debug(f"Entering text: {text}") - element.clear() - element.send_keys(text) - - def _is_numeric_field(self, field: WebElement) -> bool: - field_type = field.get_attribute("type").lower() - field_id = field.get_attribute("id").lower() - is_numeric = ( - "numeric" in field_id - or field_type == "number" - or ("text" == field_type and "numeric" in field_id) - ) - logger.debug( - f"Field type: {field_type}, Field ID: {field_id}, Is numeric: {is_numeric}" - ) - return is_numeric - - def is_date_question(self, section: WebElement) -> bool: - date_fields = section.find_elements(By.CLASS_NAME, "artdeco-datepicker__input ") - return bool(date_fields) - - def is_dropdown_question(self, section: WebElement) -> bool: - try: - question = section.find_element( - By.CLASS_NAME, "jobs-easy-apply-form-element" - ) - - dropdowns = question.find_elements(By.TAG_NAME, "select") - if not dropdowns: - dropdowns = section.find_elements( - By.CSS_SELECTOR, "[data-test-text-entity-list-form-select]" - ) - - return bool(dropdowns) - except NoSuchElementException as e: - logger.error( - f"Failed to find dropdown question: {e} {traceback.format_exc()}" - ) - return False - - def web_element_to_dropdown_question(self, section: WebElement) -> SelectQuestion: - try: - question = section.find_element( - By.CLASS_NAME, "jobs-easy-apply-form-element" - ) - - dropdowns = question.find_elements(By.TAG_NAME, "select") - - if not dropdowns: - dropdowns = section.find_elements( - By.CSS_SELECTOR, "[data-test-text-entity-list-form-select]" - ) - - if dropdowns: - raise Exception("Dropdown not found") - - dropdown = dropdowns[0] - select = Select(dropdown) - options = [option.text for option in select.options] - - logger.debug(f"Dropdown options found: {options}") - - question_text = question.find_element(By.TAG_NAME, "label").text.lower() - logger.debug(f"Processing dropdown or combobox question: {question_text}") - - # current_selection = select.first_selected_option.text - # logger.debug(f"Current selection: {current_selection}") - - return SelectQuestion( - question=question_text, - options=options, - type=SelectQuestionType.SINGLE_SELECT, - ) - - except NoSuchElementException as e: - logger.error( - f"Failed to find dropdown question: {e} {traceback.format_exc()}" - ) - raise e - - def select_dropdown_option(self, section: WebElement, answer: str) -> None: - try: - question = section.find_element( - By.CLASS_NAME, "jobs-easy-apply-form-element" - ) - - dropdowns = question.find_elements(By.TAG_NAME, "select") - - if not dropdowns: - dropdowns = section.find_elements( - By.CSS_SELECTOR, "[data-test-text-entity-list-form-select]" - ) - - if dropdowns: - raise Exception("Dropdown not found") - - dropdown = dropdowns[0] - select = Select(dropdown) - options = [option.text for option in select.options] - - logger.debug(f"Dropdown options found: {options}") - - question_text = question.find_element(By.TAG_NAME, "label").text.lower() - logger.debug(f"Processing dropdown or combobox question: {question_text}") - - self._select_dropdown_option(dropdown, answer) - - except NoSuchElementException as e: - logger.error( - f"Failed to find dropdown question: {e} {traceback.format_exc()}" - ) - raise e - - def _select_dropdown_option(self, element: WebElement, text: str) -> None: - logger.debug(f"Selecting dropdown option: {text}") - select = Select(element) - select.select_by_visible_text(text) - - def discard(self) -> None: - logger.debug("Discarding application") - try: - self.driver.find_element(By.CLASS_NAME, "artdeco-modal__dismiss").click() - time_utils.medium_sleep() - self.driver.find_elements( - By.CLASS_NAME, "artdeco-modal__confirm-dialog-btn" - )[0].click() - time_utils.medium_sleep() - except Exception as e: - logger.warning(f"Failed to discard application: {e}") - - def save(self) -> None: - logger.debug( - "Application not completed. Saving job to My Jobs, In Progess section" - ) - try: - self.driver.find_element(By.CLASS_NAME, "artdeco-modal__dismiss").click() - time_utils.medium_sleep() - self.driver.find_elements( - By.CLASS_NAME, "artdeco-modal__confirm-dialog-btn" - )[1].click() - time_utils.medium_sleep() - except Exception as e: - logger.error(f"Failed to save application process: {e}") diff --git a/src/job_portals/linkedIn/easy_apply_job_page.py b/src/job_portals/linkedIn/easy_apply_job_page.py deleted file mode 100644 index 9c6d53608..000000000 --- a/src/job_portals/linkedIn/easy_apply_job_page.py +++ /dev/null @@ -1,238 +0,0 @@ -import random -import time -import traceback - -from httpx import get -from job import Job -from jobContext import JobContext -from job_portals.base_job_portal import BaseJobPage -from src.logging import logger -import utils -from utils import browser_utils -import utils.time_utils -from selenium.webdriver.remote.webelement import WebElement -from selenium.common.exceptions import TimeoutException, NoSuchElementException -from selenium.webdriver.support.wait import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.common.by import By -from selenium.webdriver.common.action_chains import ActionChains - - - -class LinkedInEasyApplyJobPage(BaseJobPage): - - def __init__(self, driver): - super().__init__(driver) - - def goto_job_page(self, job: Job): - try: - self.driver.get(job.link) - logger.debug(f"Navigated to job link: {job.link}") - except Exception as e: - logger.error(f"Failed to navigate to job link: {job.link}, error: {str(e)}") - raise e - - utils.time_utils.medium_sleep() - self.check_for_premium_redirect(job) - - def get_apply_button(self, job_context: JobContext) -> WebElement: - return self.get_easy_apply_button(job_context) - - def check_for_premium_redirect(self, job: Job, max_attempts=3): - - current_url = self.driver.current_url - attempts = 0 - - while "linkedin.com/premium" in current_url and attempts < max_attempts: - logger.warning( - "Redirected to linkedIn Premium page. Attempting to return to job page." - ) - attempts += 1 - - self.driver.get(job.link) - time.sleep(2) - current_url = self.driver.current_url - - if "linkedin.com/premium" in current_url: - logger.error( - f"Failed to return to job page after {max_attempts} attempts. Cannot apply for the job." - ) - raise Exception( - f"Redirected to linkedIn Premium page and failed to return after {max_attempts} attempts. Job application aborted." - ) - - def click_apply_button(self, job_context: JobContext) -> None: - easy_apply_button = self.get_easy_apply_button(job_context) - logger.debug("Attempting to click 'Easy Apply' button") - actions = ActionChains(self.driver) - actions.move_to_element(easy_apply_button).click().perform() - logger.debug("'Easy Apply' button clicked successfully") - - - - def get_easy_apply_button(self, job_context: JobContext) -> WebElement: - self.driver.execute_script("document.activeElement.blur();") - logger.debug("Focus removed from the active element") - - self.check_for_premium_redirect(job_context.job) - - easy_apply_button = self._find_easy_apply_button(job_context) - return easy_apply_button - - def _find_easy_apply_button(self, job_context: JobContext) -> WebElement: - logger.debug("Searching for 'Easy Apply' button") - attempt = 0 - - search_methods = [ - { - "description": "find all 'Easy Apply' buttons using find_elements", - "find_elements": True, - "xpath": '//button[contains(@class, "jobs-apply-button") and contains(., "Easy Apply")]', - }, - { - "description": "'aria-label' containing 'Easy Apply to'", - "xpath": '//button[contains(@aria-label, "Easy Apply to")]', - }, - { - "description": "button text search", - "xpath": '//button[contains(text(), "Easy Apply") or contains(text(), "Apply now")]', - }, - ] - - while attempt < 2: - self.check_for_premium_redirect(job_context.job) - self._scroll_page() - - for method in search_methods: - try: - logger.debug(f"Attempting search using {method['description']}") - - if method.get("find_elements"): - buttons = self.driver.find_elements(By.XPATH, method["xpath"]) - if buttons: - for index, button in enumerate(buttons): - try: - WebDriverWait(self.driver, 10).until( - EC.visibility_of(button) - ) - WebDriverWait(self.driver, 10).until( - EC.element_to_be_clickable(button) - ) - logger.debug( - f"Found 'Easy Apply' button {index + 1}, attempting to click" - ) - return button - except Exception as e: - logger.warning( - f"Button {index + 1} found but not clickable: {e}" - ) - else: - raise TimeoutException("No 'Easy Apply' buttons found") - else: - button = WebDriverWait(self.driver, 10).until( - EC.presence_of_element_located((By.XPATH, method["xpath"])) - ) - WebDriverWait(self.driver, 10).until(EC.visibility_of(button)) - WebDriverWait(self.driver, 10).until( - EC.element_to_be_clickable(button) - ) - logger.debug("Found 'Easy Apply' button, attempting to click") - return button - - except TimeoutException: - logger.warning( - f"Timeout during search using {method['description']}" - ) - except Exception as e: - logger.warning( - f"Failed to click 'Easy Apply' button using {method['description']} on attempt {attempt + 1}: {e}" - ) - - self.check_for_premium_redirect(job_context.job) - - if attempt == 0: - logger.debug("Refreshing page to retry finding 'Easy Apply' button") - self.driver.refresh() - time.sleep(random.randint(3, 5)) - attempt += 1 - - page_url = self.driver.current_url - logger.error( - f"No clickable 'Easy Apply' button found after 2 attempts. page url: {page_url}" - ) - raise Exception("No clickable 'Easy Apply' button found") - - def _scroll_page(self) -> None: - logger.debug("Scrolling the page") - scrollable_element = self.driver.find_element(By.TAG_NAME, "html") - browser_utils.scroll_slow( - self.driver, scrollable_element, step=300, reverse=False - ) - browser_utils.scroll_slow( - self.driver, scrollable_element, step=300, reverse=True - ) - - def get_job_description(self, job: Job) -> str: - self.check_for_premium_redirect(job) - logger.debug("Getting job description") - try: - try: - see_more_button = self.driver.find_element( - By.XPATH, '//button[@aria-label="Click to see more description"]' - ) - actions = ActionChains(self.driver) - actions.move_to_element(see_more_button).click().perform() - time.sleep(2) - except NoSuchElementException: - logger.debug("See more button not found, skipping") - - try: - description = self.driver.find_element( - By.CLASS_NAME, "jobs-description-content__text" - ).text - except NoSuchElementException: - logger.debug( - "First class not found, checking for second class for premium members" - ) - description = self.driver.find_element( - By.CLASS_NAME, "job-details-about-the-job-module__description" - ).text - - logger.debug("Job description retrieved successfully") - return description - except NoSuchElementException: - tb_str = traceback.format_exc() - logger.error(f"Job description not found: {tb_str}") - raise Exception(f"Job description not found: \nTraceback:\n{tb_str}") - except Exception: - tb_str = traceback.format_exc() - logger.error(f"Error getting Job description: {tb_str}") - raise Exception(f"Error getting Job description: \nTraceback:\n{tb_str}") - - def get_recruiter_link(self) -> str: - logger.debug("Getting job recruiter information") - try: - hiring_team_section = WebDriverWait(self.driver, 10).until( - EC.presence_of_element_located( - (By.XPATH, '//h2[text()="Meet the hiring team"]') - ) - ) - logger.debug("Hiring team section found") - - recruiter_elements = hiring_team_section.find_elements( - By.XPATH, './/following::a[contains(@href, "linkedin.com/in/")]' - ) - - if recruiter_elements: - recruiter_element = recruiter_elements[0] - recruiter_link = recruiter_element.get_attribute("href") - logger.debug( - f"Job recruiter link retrieved successfully: {recruiter_link}" - ) - return recruiter_link - else: - logger.debug("No recruiter link found in the hiring team section") - return "" - except Exception as e: - logger.warning(f"Failed to retrieve recruiter information: {e}") - return "" diff --git a/src/job_portals/linkedIn/jobs_page.py b/src/job_portals/linkedIn/jobs_page.py deleted file mode 100644 index 69a7756a5..000000000 --- a/src/job_portals/linkedIn/jobs_page.py +++ /dev/null @@ -1,218 +0,0 @@ -import re -import traceback -from constants import DATE_24_HOURS, DATE_ALL_TIME, DATE_MONTH, DATE_WEEK -from job import Job -from src.logging import logger -from job_portals.base_job_portal import BaseJobsPage -import urllib.parse -from selenium.common.exceptions import NoSuchElementException -from selenium.webdriver.common.by import By - -from utils import browser_utils - - -class LinkedInJobsPage(BaseJobsPage): - - def __init__(self, driver, parameters): - super().__init__(driver, parameters) - self.base_search_url = self.get_base_search_url() - - def next_job_page(self, position, location, page_number): - logger.debug( - f"Navigating to next job page: {position} in {location}, page {page_number}" - ) - encoded_position = urllib.parse.quote(position) - self.driver.get( - f"https://www.linkedin.com/jobs/search/{self.base_search_url}&keywords={encoded_position}{location}&start={page_number * 25}" - ) - - def job_tile_to_job(self, job_tile) -> Job: - logger.debug("Extracting job information from tile") - job = Job() - - try: - job.title = ( - job_tile.find_element(By.CLASS_NAME, "job-card-list__title") - .find_element(By.TAG_NAME, "strong") - .text - ) - logger.debug(f"Job title extracted: {job.title}") - except NoSuchElementException: - logger.warning("Job title is missing.") - - try: - job.link = ( - job_tile.find_element(By.CLASS_NAME, "job-card-list__title") - .get_attribute("href") - .split("?")[0] - ) - logger.debug(f"Job link extracted: {job.link}") - except NoSuchElementException: - logger.warning("Job link is missing.") - - try: - job.company = job_tile.find_element( - By.XPATH, - ".//div[contains(@class, 'artdeco-entity-lockup__subtitle')]//span", - ).text - logger.debug(f"Job company extracted: {job.company}") - except NoSuchElementException as e: - logger.warning(f"Job company is missing. {e} {traceback.format_exc()}") - - # Extract job ID from job url - try: - match = re.search(r"/jobs/view/(\d+)/", job.link) - if match: - job.id = match.group(1) - else: - logger.warning(f"Job ID not found in link: {job.link}") - ( - logger.debug(f"Job ID extracted: {job.id} from url:{job.link}") - if match - else logger.warning(f"Job ID not found in link: {job.link}") - ) - except Exception as e: - logger.warning(f"Failed to extract job ID: {e}", exc_info=True) - - try: - job.location = job_tile.find_element( - By.CLASS_NAME, "job-card-container__metadata-item" - ).text - except NoSuchElementException: - logger.warning("Job location is missing.") - - try: - job_state = job_tile.find_element( - By.XPATH, - ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__apply-method')]", - ).text - except NoSuchElementException as e: - try: - # Fetching state when apply method is not found - job_state = job_tile.find_element( - By.XPATH, - ".//ul[contains(@class, 'job-card-list__footer-wrapper')]//li[contains(@class, 'job-card-container__footer-job-state')]", - ).text - job.apply_method = "Applied" - logger.warning( - f"Apply method not found, state {job_state}. {e} {traceback.format_exc()}" - ) - except NoSuchElementException as e: - logger.warning( - f"Apply method and state not found. {e} {traceback.format_exc()}" - ) - - return job - - def get_jobs_from_page(self, scroll=False): - - try: - no_jobs_element = self.driver.find_element( - By.CLASS_NAME, "jobs-search-two-pane__no-results-banner--expand" - ) - if ( - "No matching jobs found" in no_jobs_element.text - or "unfortunately, things aren" in self.driver.page_source.lower() - ): - logger.debug("No matching jobs found on this page, skipping.") - return [] - - except NoSuchElementException: - pass - - try: - # XPath query to find the ul tag with class scaffold-layout__list-container - jobs_xpath_query = ( - "//ul[contains(@class, 'scaffold-layout__list-container')]" - ) - jobs_container = self.driver.find_element(By.XPATH, jobs_xpath_query) - - if scroll: - jobs_container_scrolableElement = jobs_container.find_element( - By.XPATH, ".." - ) - logger.warning( - f"is scrollable: {browser_utils.is_scrollable(jobs_container_scrolableElement)}" - ) - - browser_utils.scroll_slow(self.driver, jobs_container_scrolableElement) - browser_utils.scroll_slow( - self.driver, jobs_container_scrolableElement, step=300, reverse=True - ) - - job_element_list = jobs_container.find_elements( - By.XPATH, - ".//li[contains(@class, 'jobs-search-results__list-item') and contains(@class, 'ember-view')]", - ) - - if not job_element_list: - logger.debug("No job class elements found on page, skipping.") - return [] - - return job_element_list - - except NoSuchElementException as e: - logger.warning( - f"No job results found on the page. \n expection: {traceback.format_exc()}" - ) - return [] - - except Exception as e: - logger.error( - f"Error while fetching job elements: {e} {traceback.format_exc()}" - ) - return [] - - def get_base_search_url(self): - parameters = self.parameters - logger.debug("Constructing linkedin base search URL") - url_parts = [] - working_type_filter = [] - if parameters.get("onsite") == True: - working_type_filter.append("1") - if parameters.get("remote") == True: - working_type_filter.append("2") - if parameters.get("hybrid") == True: - working_type_filter.append("3") - - if working_type_filter: - url_parts.append(f"f_WT={'%2C'.join(working_type_filter)}") - - experience_levels = [ - str(i + 1) - for i, (level, v) in enumerate( - parameters.get("experience_level", {}).items() - ) - if v - ] - if experience_levels: - url_parts.append(f"f_E={','.join(experience_levels)}") - url_parts.append(f"distance={parameters['distance']}") - job_types = [ - key[0].upper() - for key, value in parameters.get("jobTypes", {}).items() - if value - ] - if job_types: - url_parts.append(f"f_JT={','.join(job_types)}") - - date_param = next( - ( - v - for k, v in self.DATE_MAPPING.items() - if parameters.get("date", {}).get(k) - ), - "", - ) - url_parts.append("f_LF=f_AL") # Easy Apply - base_url = "&".join(url_parts) - full_url = f"?{base_url}{date_param}" - logger.debug(f"Base search URL constructed: {full_url}") - return full_url - - DATE_MAPPING = { - DATE_ALL_TIME: "", - DATE_MONTH: "&f_TPR=r2592000", - DATE_WEEK: "&f_TPR=r604800", - DATE_24_HOURS: "&f_TPR=r86400", - } diff --git a/src/job_portals/linkedIn/linkedin.py b/src/job_portals/linkedIn/linkedin.py deleted file mode 100644 index 801f0d416..000000000 --- a/src/job_portals/linkedIn/linkedin.py +++ /dev/null @@ -1,33 +0,0 @@ -import re -from job_portals.linkedIn.easy_application_page import LinkedInEasyApplicationPage -from job_portals.linkedIn.easy_apply_job_page import LinkedInEasyApplyJobPage -from src.job_portals.base_job_portal import BaseJobPortal -from src.job_portals.linkedIn.authenticator import LinkedInAuthenticator -from src.job_portals.linkedIn.jobs_page import LinkedInJobsPage - - - -class LinkedIn(BaseJobPortal): - - def __init__(self, driver, parameters): - self.driver = driver - self._authenticator = LinkedInAuthenticator(driver) - self._jobs_page = LinkedInJobsPage(driver, parameters) - self._application_page = LinkedInEasyApplicationPage(driver) - self._job_page = LinkedInEasyApplyJobPage(driver) - - @property - def jobs_page(self): - return self._jobs_page - - @property - def job_page(self): - return self._job_page - - @property - def authenticator(self): - return self._authenticator - - @property - def application_page(self): - return self._application_page \ No newline at end of file diff --git a/src/regex_utils.py b/src/regex_utils.py deleted file mode 100644 index 236e9b5f2..000000000 --- a/src/regex_utils.py +++ /dev/null @@ -1,24 +0,0 @@ -import re - -def look_ahead_patterns(keyword_list): - # Converts each blacklist entry to a regex pattern that ensures all words appear, in any order - # - # Example of pattern for job title: - # title_blacklist = ["Data Engineer", "Software Engineer"] - # patterns = ['(?=.*\\bData\\b)(?=.*\\bEngineer\\b)', '(?=.*\\bSoftware\\b)(?=.*\\bEngineer\\b)'] - # - # Description: - # '?=.*' => Regex expression that allows us to check if the following pattern appears - # somewhere in the string searched, even if there are any characters before the word - # '\b{WORD}\b' => Regex expression for a word boundry, that the WORD is treated as whole words - # rather than as parts of other words. - patterns = [] - for term in keyword_list: - # Split term into individual words - words = term.split() - # Create a lookahead for each word to ensure it appears independently - lookaheads = [fr"(?=.*\b{re.escape(word)}\b)" for word in words] - # Combine lookaheads with a pattern that allows flexible separators between the words - pattern = "".join(lookaheads) # Ensures all words are present - patterns.append(pattern) - return patterns \ No newline at end of file diff --git a/src/utils/chrome_utils.py b/src/utils/chrome_utils.py index 3d3a84ac3..285f9fa6a 100644 --- a/src/utils/chrome_utils.py +++ b/src/utils/chrome_utils.py @@ -1,24 +1,15 @@ import os +import time from selenium import webdriver +from selenium.webdriver.chrome.service import Service as ChromeService +from selenium.webdriver.chrome.options import Options +from webdriver_manager.chrome import ChromeDriverManager # Import webdriver_manager +import urllib from src.logging import logger -chromeProfilePath = os.path.join(os.getcwd(), "chrome_profile", "linkedin_profile") - -def ensure_chrome_profile(): - logger.debug(f"Ensuring Chrome profile exists at path: {chromeProfilePath}") - profile_dir = os.path.dirname(chromeProfilePath) - if not os.path.exists(profile_dir): - os.makedirs(profile_dir) - logger.debug(f"Created directory for Chrome profile: {profile_dir}") - if not os.path.exists(chromeProfilePath): - os.makedirs(chromeProfilePath) - logger.debug(f"Created Chrome profile directory: {chromeProfilePath}") - return chromeProfilePath - def chrome_browser_options(): logger.debug("Setting Chrome browser options") - ensure_chrome_profile() - options = webdriver.ChromeOptions() + options = Options() options.add_argument("--start-maximized") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") @@ -37,24 +28,64 @@ def chrome_browser_options(): options.add_argument("--disable-plugins") options.add_argument("--disable-animations") options.add_argument("--disable-cache") - options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"]) - - prefs = { - "profile.default_content_setting_values.images": 2, - "profile.managed_default_content_settings.stylesheets": 2, - } - options.add_experimental_option("prefs", prefs) - - if len(chromeProfilePath) > 0: - initial_path = os.path.dirname(chromeProfilePath) - profile_dir = os.path.basename(chromeProfilePath) - options.add_argument('--user-data-dir=' + initial_path) - options.add_argument("--profile-directory=" + profile_dir) - logger.debug(f"Using Chrome profile directory: {chromeProfilePath}") - else: - options.add_argument("--incognito") - logger.debug("Using Chrome in incognito mode") - + options.add_argument("--incognito") + logger.debug("Using Chrome in incognito mode") + return options +def init_browser() -> webdriver.Chrome: + try: + options = chrome_browser_options() + # Use webdriver_manager to handle ChromeDriver + driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options) + logger.debug("Chrome browser initialized successfully.") + return driver + except Exception as e: + logger.error(f"Failed to initialize browser: {str(e)}") + raise RuntimeError(f"Failed to initialize browser: {str(e)}") + + + +def HTML_to_PDF(html_content, driver): + """ + Converte una stringa HTML in un PDF e restituisce il PDF come stringa base64. + + :param html_content: Stringa contenente il codice HTML da convertire. + :param driver: Istanza del WebDriver di Selenium. + :return: Stringa base64 del PDF generato. + :raises ValueError: Se l'input HTML non è una stringa valida. + :raises RuntimeError: Se si verifica un'eccezione nel WebDriver. + """ + # Validazione del contenuto HTML + if not isinstance(html_content, str) or not html_content.strip(): + raise ValueError("Il contenuto HTML deve essere una stringa non vuota.") + + # Codifica l'HTML in un URL di tipo data + encoded_html = urllib.parse.quote(html_content) + data_url = f"data:text/html;charset=utf-8,{encoded_html}" + + try: + driver.get(data_url) + # Attendi che la pagina si carichi completamente + time.sleep(2) # Potrebbe essere necessario aumentare questo tempo per HTML complessi + # Esegue il comando CDP per stampare la pagina in PDF + pdf_base64 = driver.execute_cdp_cmd("Page.printToPDF", { + "printBackground": True, # Includi lo sfondo nella stampa + "landscape": False, # Stampa in verticale (False per ritratto) + "paperWidth": 8.27, # Larghezza del foglio in pollici (A4) + "paperHeight": 11.69, # Altezza del foglio in pollici (A4) + "marginTop": 0.8, # Margine superiore in pollici (circa 2 cm) + "marginBottom": 0.8, # Margine inferiore in pollici (circa 2 cm) + "marginLeft": 0.5, # Margine sinistro in pollici (circa 1.27 cm) + "marginRight": 0.5, # Margine destro in pollici (circa 1.27 cm) + "displayHeaderFooter": False, # Non visualizzare intestazioni e piè di pagina + "preferCSSPageSize": True, # Preferire le dimensioni della pagina CSS + "generateDocumentOutline": False, # Non generare un sommario del documento + "generateTaggedPDF": False, # Non generare PDF taggato + "transferMode": "ReturnAsBase64" # Restituire il PDF come stringa base64 + }) + return pdf_base64['data'] + except Exception as e: + logger.error(f"Si è verificata un'eccezione WebDriver: {e}") + raise RuntimeError(f"Si è verificata un'eccezione WebDriver: {e}") diff --git a/constants.py b/src/utils/constants.py similarity index 98% rename from constants.py rename to src/utils/constants.py index 86ae70c68..54f34214e 100644 --- a/constants.py +++ b/src/utils/constants.py @@ -3,7 +3,6 @@ DATE_WEEK = "week" DATE_24_HOURS = "24_hours" -LINKEDIN = "linkedin" # constants used in application SECRETS_YAML = "secrets.yaml" diff --git a/src/utils/time_utils.py b/src/utils/time_utils.py deleted file mode 100644 index fd8e065cc..000000000 --- a/src/utils/time_utils.py +++ /dev/null @@ -1,10 +0,0 @@ -import random -import time - - -def short_sleep() -> None: - time.sleep(random.uniform(1.2, 3)) - - -def medium_sleep() -> None: - time.sleep(random.uniform(3, 5)) \ No newline at end of file diff --git a/tests/test_aihawk_bot_facade.py b/tests/test_aihawk_bot_facade.py deleted file mode 100644 index edccf6278..000000000 --- a/tests/test_aihawk_bot_facade.py +++ /dev/null @@ -1,14 +0,0 @@ -import pytest -# from src.aihawk_job_manager import JobManager - -@pytest.fixture -def job_manager(): - """Fixture for JobManager.""" - return None # Replace with valid instance or mock later - -def test_bot_functionality(job_manager): - """Test AIHawk bot facade.""" - # Example: test job manager interacts with the bot facade correctly - job = {"title": "Software Engineer"} - # job_manager.some_method_to_apply(job) - assert job is not None # Placeholder for actual test diff --git a/tests/test_aihawk_easy_applier.py b/tests/test_aihawk_easy_applier.py deleted file mode 100644 index 7329c835c..000000000 --- a/tests/test_aihawk_easy_applier.py +++ /dev/null @@ -1,99 +0,0 @@ -# import pytest -# from unittest import mock - -# from ai_hawk.job_applier import AIHawkJobApplier - - - -# @pytest.fixture -# def mock_driver(): -# """Fixture to mock Selenium WebDriver.""" -# return mock.Mock() - - -# @pytest.fixture -# def mock_gpt_answerer(): -# """Fixture to mock GPT Answerer.""" -# return mock.Mock() - - -# @pytest.fixture -# def mock_resume_generator_manager(): -# """Fixture to mock Resume Generator Manager.""" -# return mock.Mock() - - -# @pytest.fixture -# def easy_applier(mock_driver, mock_gpt_answerer, mock_resume_generator_manager): -# """Fixture to initialize AIHawkEasyApplier with mocks.""" -# return AIHawkJobApplier( -# driver=mock_driver, -# resume_dir="/path/to/resume", -# set_old_answers=[('Question 1', 'Answer 1', 'Type 1')], -# gpt_answerer=mock_gpt_answerer, -# resume_generator_manager=mock_resume_generator_manager -# ) - - -# def test_initialization(mocker, easy_applier): -# """Test that AIHawkEasyApplier is initialized correctly.""" -# # Mock os.path.exists to return True -# mocker.patch('os.path.exists', return_value=True) - -# easy_applier = AIHawkJobApplier( -# driver=mocker.Mock(), -# resume_dir="/path/to/resume", -# set_old_answers=[('Question 1', 'Answer 1', 'Type 1')], -# gpt_answerer=mocker.Mock(), -# resume_generator_manager=mocker.Mock() -# ) - -# assert easy_applier.resume_path == "/path/to/resume" -# assert len(easy_applier.set_old_answers) == 1 -# assert easy_applier.gpt_answerer is not None -# assert easy_applier.resume_generator_manager is not None - - -# def test_apply_to_job_success(mocker, easy_applier): -# """Test successfully applying to a job.""" -# mock_job = mock.Mock() - -# # Mock job_apply so we don't actually try to apply -# mocker.patch.object(easy_applier, 'job_apply') - -# easy_applier.apply_to_job(mock_job) -# easy_applier.job_apply.assert_called_once_with(mock_job) - - -# def test_apply_to_job_failure(mocker, easy_applier): -# """Test failure while applying to a job.""" -# mock_job = mock.Mock() -# mocker.patch.object(easy_applier, 'job_apply', -# side_effect=Exception("Test error")) - -# with pytest.raises(Exception, match="Test error"): -# easy_applier.apply_to_job(mock_job) - -# easy_applier.job_apply.assert_called_once_with(mock_job) - - -# def test_check_for_premium_redirect_no_redirect(mocker, easy_applier): -# """Test that check_for_premium_redirect works when there's no redirect.""" -# mock_job = mock.Mock() -# easy_applier.driver.current_url = "https://www.linkedin.com/jobs/view/1234" - -# easy_applier.check_for_premium_redirect(mock_job) -# easy_applier.driver.get.assert_not_called() - - -# def test_check_for_premium_redirect_with_redirect(mocker, easy_applier): -# """Test that check_for_premium_redirect handles linkedin Premium redirects.""" -# mock_job = mock.Mock() -# easy_applier.driver.current_url = "https://www.linkedin.com/premium" -# mock_job.link = "https://www.linkedin.com/jobs/view/1234" - -# with pytest.raises(Exception, match="Redirected to linkedIn Premium page and failed to return after 3 attempts. Job application aborted."): -# easy_applier.check_for_premium_redirect(mock_job) - -# # Verify that it attempted to return to the job page 3 times -# assert easy_applier.driver.get.call_count == 3 diff --git a/tests/test_aihawk_job_manager.py b/tests/test_aihawk_job_manager.py deleted file mode 100644 index de09a097d..000000000 --- a/tests/test_aihawk_job_manager.py +++ /dev/null @@ -1,185 +0,0 @@ -# import json -# import re -# from src.job import Job -# from unittest import mock -# from pathlib import Path -# import os -# import pytest -# from ai_hawk.job_manager import AIHawkJobManager -# from selenium.common.exceptions import NoSuchElementException -# from src.logging import logger - - -# @pytest.fixture -# def job_manager(mocker): -# """Fixture to create a AIHawkJobManager instance with mocked driver.""" -# mock_driver = mocker.Mock() -# return AIHawkJobManager(mock_driver) - - -# def test_initialization(job_manager): -# """Test AIHawkJobManager initialization.""" -# assert job_manager.driver is not None -# assert job_manager.set_old_answers == set() -# assert job_manager.easy_applier_component is None - - -# def test_set_parameters(mocker, job_manager): -# """Test setting parameters for the AIHawkJobManager.""" -# # Mocking os.path.exists to return True for the resume path -# mocker.patch('pathlib.Path.exists', return_value=True) - -# params = { -# 'company_blacklist': ['Company A', 'Company B'], -# 'title_blacklist': ['Intern', 'Junior'], -# 'positions': ['Software Engineer', 'Data Scientist'], -# 'locations': ['New York', 'San Francisco'], -# 'apply_once_at_company': True, -# 'uploads': {'resume': '/path/to/resume'}, # Resume path provided here -# 'outputFileDirectory': '/path/to/output', -# 'job_applicants_threshold': { -# 'min_applicants': 5, -# 'max_applicants': 50 -# }, -# 'remote': False, -# 'distance': 50, -# 'date': {'all_time': True} -# } - -# job_manager.set_parameters(params) - -# # Normalize paths to handle platform differences (e.g., Windows vs Unix-like systems) -# assert str(job_manager.resume_path) == os.path.normpath('/path/to/resume') -# assert str(job_manager.output_file_directory) == os.path.normpath( -# '/path/to/output') - - -# def next_job_page(self, position, location, job_page): -# logger.debug(f"Navigating to next job page: {position} in {location}, page {job_page}") -# self.driver.get( -# f"https://www.linkedin.com/jobs/search/{self.base_search_url}&keywords={position}&location={location}&start={job_page * 25}") - - -# def test_get_jobs_from_page_no_jobs(mocker, job_manager): -# """Test get_jobs_from_page when no jobs are found.""" -# mocker.patch.object(job_manager.driver, 'find_element', -# side_effect=NoSuchElementException) - -# jobs = job_manager.get_jobs_from_page() -# assert jobs == [] - - -# def test_get_jobs_from_page_with_jobs(mocker, job_manager): -# """Test get_jobs_from_page when job elements are found.""" -# # Mock no_jobs_element to simulate the absence of "No matching jobs found" banner -# no_jobs_element_mock = mocker.Mock() -# no_jobs_element_mock.text = "" # Empty text means "No matching jobs found" is not present - -# # Mock the driver to simulate the page source -# mocker.patch.object(job_manager.driver, 'page_source', return_value="") - -# # Mock the outer find_element -# container_mock = mocker.Mock() - -# # Mock the inner find_elements to return job list items -# job_element_mock = mocker.Mock() -# # Simulating two job items -# job_elements_list = [job_element_mock, job_element_mock] - -# # Return the container mock, which itself returns the job elements list -# container_mock.find_elements.return_value = job_elements_list -# mocker.patch.object(job_manager.driver, 'find_element', side_effect=[ -# no_jobs_element_mock, -# container_mock -# ]) - -# job_manager.get_jobs_from_page() - -# assert job_manager.driver.find_element.call_count == 2 -# assert container_mock.find_elements.call_count == 1 - - - -# def test_apply_jobs_with_no_jobs(mocker, job_manager): -# """Test apply_jobs when no jobs are found.""" -# # Mocking find_element to return a mock element that simulates no jobs -# mock_element = mocker.Mock() -# mock_element.text = "No matching jobs found" - -# # Mock the driver to return the mock element when find_element is called -# mocker.patch.object(job_manager.driver, 'find_element', -# return_value=mock_element) - -# # Call apply_jobs and ensure no exceptions are raised -# job_manager.apply_jobs() - -# # Ensure it attempted to find the job results list -# assert job_manager.driver.find_element.call_count == 1 - - -# def test_apply_jobs_with_jobs(mocker, job_manager): -# """Test apply_jobs when jobs are present.""" - -# # Mock the page_source to simulate what the page looks like when jobs are present -# mocker.patch.object(job_manager.driver, 'page_source', -# return_value="some job content") - -# # Simulating two job elements -# job_element_mock = mocker.Mock() -# job_elements_list = [job_element_mock, job_element_mock] - -# mocker.patch.object(job_manager, 'get_jobs_from_page', return_value=job_elements_list) - -# job = Job( -# title="Title", -# company="Company", -# location="Location", -# apply_method="", -# link="Link" -# ) - -# # Mock the extract_job_information_from_tile method to return sample job info -# mocker.patch.object(job_manager, 'job_tile_to_job', return_value=job) - -# # Mock other methods like is_blacklisted, is_already_applied_to_job, and is_already_applied_to_company -# mocker.patch.object(job_manager, 'is_blacklisted', return_value=False) -# mocker.patch.object( -# job_manager, 'is_already_applied_to_job', return_value=False) -# mocker.patch.object( -# job_manager, 'is_already_applied_to_company', return_value=False) - -# # Mock the AIHawkEasyApplier component -# job_manager.easy_applier_component = mocker.Mock() - -# # Mock the output_file_directory as a valid Path object -# job_manager.output_file_directory = Path("/mocked/path/to/output") - -# # Mock Path.exists() to always return True (so no actual file system interaction is needed) -# mocker.patch.object(Path, 'exists', return_value=True) - -# # Mock the open function to prevent actual file writing -# failed_mock_data = [{ -# "company": "TestCompany", -# "job_title": "Test Data Engineer", -# "link": "https://www.example.com/jobs/view/1234567890/", -# "job_recruiter": "", -# "job_location": "Anywhere (Remote)", -# "pdf_path": "file:///mocked/path/to/pdf" -# }] - -# # Serialize the dictionary to a JSON string -# json_read_data = json.dumps(failed_mock_data) - -# mock_open = mocker.mock_open(read_data=json_read_data) -# mocker.patch('builtins.open', mock_open) - -# # Run the apply_jobs method -# job_manager.apply_jobs() - -# # Assertions -# assert job_manager.get_jobs_from_page.call_count == 1 -# # Called for each job element -# assert job_manager.job_tile_to_job.call_count == 2 -# # Called for each job element -# assert job_manager.easy_applier_component.job_apply.call_count == 2 -# mock_open.assert_called() # Ensure that the open function was called diff --git a/tests/test_job_application_profile.py b/tests/test_job_application_profile.py deleted file mode 100644 index f59ac3a9d..000000000 --- a/tests/test_job_application_profile.py +++ /dev/null @@ -1,185 +0,0 @@ -import pytest -from src.job_application_profile import JobApplicationProfile - -@pytest.fixture -def valid_yaml(): - """Valid YAML string for initializing JobApplicationProfile.""" - return """ - self_identification: - gender: Male - pronouns: He/Him - veteran: No - disability: No - ethnicity: Asian - legal_authorization: - eu_work_authorization: "Yes" - us_work_authorization: "Yes" - requires_us_visa: "No" - requires_us_sponsorship: "Yes" - requires_eu_visa: "No" - legally_allowed_to_work_in_eu: "Yes" - legally_allowed_to_work_in_us: "Yes" - requires_eu_sponsorship: "No" - canada_work_authorization: "Yes" - requires_canada_visa: "No" - legally_allowed_to_work_in_canada: "Yes" - requires_canada_sponsorship: "No" - uk_work_authorization: "Yes" - requires_uk_visa: "No" - legally_allowed_to_work_in_uk: "Yes" - requires_uk_sponsorship: "No" - work_preferences: - remote_work: "Yes" - in_person_work: "No" - open_to_relocation: "Yes" - willing_to_complete_assessments: "Yes" - willing_to_undergo_drug_tests: "Yes" - willing_to_undergo_background_checks: "Yes" - availability: - notice_period: "2 weeks" - salary_expectations: - salary_range_usd: "80000-120000" - """ - -@pytest.fixture -def missing_field_yaml(): - """YAML string missing a required field (self_identification).""" - return """ - legal_authorization: - eu_work_authorization: "Yes" - us_work_authorization: "Yes" - requires_us_visa: "No" - requires_us_sponsorship: "Yes" - requires_eu_visa: "No" - legally_allowed_to_work_in_eu: "Yes" - legally_allowed_to_work_in_us: "Yes" - requires_eu_sponsorship: "No" - canada_work_authorization: "Yes" - requires_canada_visa: "No" - legally_allowed_to_work_in_canada: "Yes" - requires_canada_sponsorship: "No" - uk_work_authorization: "Yes" - requires_uk_visa: "No" - legally_allowed_to_work_in_uk: "Yes" - requires_uk_sponsorship: "No" - work_preferences: - remote_work: "Yes" - in_person_work: "No" - open_to_relocation: "Yes" - willing_to_complete_assessments: "Yes" - willing_to_undergo_drug_tests: "Yes" - willing_to_undergo_background_checks: "Yes" - availability: - notice_period: "2 weeks" - salary_expectations: - salary_range_usd: "80000-120000" - """ - -@pytest.fixture -def invalid_type_yaml(): - """YAML string with an invalid type for a field.""" - return """ - self_identification: - gender: Male - pronouns: He/Him - veteran: No - disability: No - ethnicity: Asian - legal_authorization: - eu_work_authorization: "Yes" - us_work_authorization: "Yes" - requires_us_visa: "No" - requires_us_sponsorship: "Yes" - requires_eu_visa: "No" - legally_allowed_to_work_in_eu: "Yes" - legally_allowed_to_work_in_us: "Yes" - requires_eu_sponsorship: "No" - canada_work_authorization: "Yes" - requires_canada_visa: "No" - legally_allowed_to_work_in_canada: "Yes" - requires_canada_sponsorship: "No" - uk_work_authorization: "Yes" - requires_uk_visa: "No" - legally_allowed_to_work_in_uk: "Yes" - requires_uk_sponsorship: "No" - work_preferences: - remote_work: 12345 # Invalid type, expecting a string - in_person_work: "No" - open_to_relocation: "Yes" - willing_to_complete_assessments: "Yes" - willing_to_undergo_drug_tests: "Yes" - willing_to_undergo_background_checks: "Yes" - availability: - notice_period: "2 weeks" - salary_expectations: - salary_range_usd: "80000-120000" - """ - -def test_initialize_with_valid_yaml(valid_yaml): - """Test initializing JobApplicationProfile with valid YAML.""" - profile = JobApplicationProfile(valid_yaml) - - # Check that the profile fields are correctly initialized - assert profile.self_identification.gender == "Male" - assert profile.self_identification.pronouns == "He/Him" - assert profile.legal_authorization.eu_work_authorization == "Yes" - assert profile.work_preferences.remote_work == "Yes" - assert profile.availability.notice_period == "2 weeks" - assert profile.salary_expectations.salary_range_usd == "80000-120000" - -def test_initialize_with_missing_field(missing_field_yaml): - """Test initializing JobApplicationProfile with missing required fields.""" - with pytest.raises(KeyError) as excinfo: - JobApplicationProfile(missing_field_yaml) - assert "self_identification" in str(excinfo.value) - -def test_initialize_with_invalid_yaml(): - """Test initializing JobApplicationProfile with invalid YAML.""" - invalid_yaml_str = """ - self_identification: - gender: Male - pronouns: He/Him - veteran: No - disability: No - ethnicity: Asian - legal_authorization: - eu_work_authorization: "Yes" - us_work_authorization: "Yes" - requires_us_visa: "No" - requires_us_sponsorship: "Yes" - requires_eu_visa: "No" - legally_allowed_to_work_in_eu: "Yes" - legally_allowed_to_work_in_us: "Yes" - requires_eu_sponsorship: "No" - canada_work_authorization: "Yes" - requires_canada_visa: "No" - legally_allowed_to_work_in_canada: "Yes" - requires_canada_sponsorship: "No" - uk_work_authorization: "Yes" - requires_uk_visa: "No" - legally_allowed_to_work_in_uk: "Yes" - requires_uk_sponsorship: "No" - work_preferences: - remote_work: "Yes" - in_person_work: "No" - availability: - notice_period: "2 weeks" - salary_expectations: - salary_range_usd: "80000-120000" - """ # Missing fields in work_preferences - - with pytest.raises(TypeError): - JobApplicationProfile(invalid_yaml_str) - -def test_str_representation(valid_yaml): - """Test the string representation of JobApplicationProfile.""" - profile = JobApplicationProfile(valid_yaml) - profile_str = str(profile) - - assert "Self Identification:" in profile_str - assert "Legal Authorization:" in profile_str - assert "Work Preferences:" in profile_str - assert "Availability:" in profile_str - assert "Salary Expectations:" in profile_str - assert "Male" in profile_str - assert "80000-120000" in profile_str diff --git a/tests/test_linkedIn_authenticator.py b/tests/test_linkedIn_authenticator.py deleted file mode 100644 index af2a5757b..000000000 --- a/tests/test_linkedIn_authenticator.py +++ /dev/null @@ -1,105 +0,0 @@ -from httpx import get -from numpy import place -import pytest -from selenium.webdriver.common.by import By -from selenium.webdriver.support.wait import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC -from ai_hawk.authenticator import AIHawkAuthenticator -from selenium.common.exceptions import NoSuchElementException, TimeoutException - -from job_portals.base_job_portal import get_authenticator -from job_portals.linkedIn.authenticator import LinkedInAuthenticator - - - - -@pytest.fixture -def mock_driver(mocker): - """Fixture to mock the Selenium WebDriver.""" - return mocker.Mock() - - -@pytest.fixture -def authenticator(mock_driver): - """Fixture to initialize AIHawkAuthenticator with a mocked driver.""" - return get_authenticator(mock_driver, platform='linkedin') - - -def test_handle_login(mocker, authenticator): - """Test handling the AIHawk login process.""" - mocker.patch.object(authenticator.driver, 'get') - mocker.patch.object(authenticator, 'prompt_for_credentials') - mocker.patch.object(authenticator, 'handle_security_checks') - - # Mock current_url as a regular return value, not PropertyMock - mocker.patch.object(authenticator.driver, 'current_url', - return_value='https://www.linkedin.com/login') - - authenticator.handle_login() - - authenticator.driver.get.assert_called_with( - 'https://www.linkedin.com/login') - authenticator.prompt_for_credentials.assert_called_once() - authenticator.handle_security_checks.assert_called_once() - - -def test_enter_credentials_success(mocker, authenticator): - """Test entering credentials.""" - email_mock = mocker.Mock() - password_mock = mocker.Mock() - - mocker.patch.object(WebDriverWait, 'until', return_value=email_mock) - mocker.patch.object(authenticator.driver, 'find_element', - return_value=password_mock) - -def test_is_logged_in_true(mock_driver): - # Mock the current_url to simulate a logged-in state - mock_driver.current_url = "https://www.linkedin.com/feed/" - authenticator = LinkedInAuthenticator(mock_driver) - - assert authenticator.is_logged_in == True - -def test_is_logged_in_false(mock_driver): - # Mock the current_url to simulate a logged-out state - mock_driver.current_url = "https://www.linkedin.com/login" - authenticator = LinkedInAuthenticator(mock_driver) - - assert authenticator.is_logged_in == False - -def test_is_logged_in_partial_keyword(mock_driver): - # Mock the current_url to simulate a URL containing a keyword but not logged in - mock_driver.current_url = "https://www.linkedin.com/jobs/search/" - authenticator = LinkedInAuthenticator(mock_driver) - - assert authenticator.is_logged_in == True - -def test_is_logged_in_no_linkedin(mock_driver): - # Mock the current_url to simulate a URL not related to LinkedIn - mock_driver.current_url = "https://www.example.com/feed/" - authenticator = LinkedInAuthenticator(mock_driver) - - assert authenticator.is_logged_in == False - - -def test_handle_security_check_success(mocker, authenticator): - """Test handling security check successfully.""" - mocker.patch.object(WebDriverWait, 'until', side_effect=[ - mocker.Mock(), # Security checkpoint detection - mocker.Mock() # Security check completion - ]) - - authenticator.handle_security_checks() - - # Verify WebDriverWait is called with EC.url_contains for both the challenge and feed - WebDriverWait(authenticator.driver, 10).until.assert_any_call(mocker.ANY) - WebDriverWait(authenticator.driver, 300).until.assert_any_call(mocker.ANY) - - -def test_handle_security_check_timeout(mocker, authenticator): - """Test handling security check timeout.""" - mocker.patch.object(WebDriverWait, 'until', side_effect=TimeoutException) - - authenticator.handle_security_checks() - - # Verify WebDriverWait is called with EC.url_contains for the challenge - WebDriverWait(authenticator.driver, 10).until.assert_any_call(mocker.ANY) diff --git a/tests/test_regex_utils.py b/tests/test_regex_utils.py deleted file mode 100644 index ae51f2fd5..000000000 --- a/tests/test_regex_utils.py +++ /dev/null @@ -1,55 +0,0 @@ -import pytest -from ai_hawk.job_manager import AIHawkJobManager -from src.regex_utils import look_ahead_patterns - -apply_component = AIHawkJobManager(None) # For this test we dont need the web driver - -# Test title, company and location blacklist definition -title_blacklist = ["Data Engineer", "Software Engineer"] -company_blacklist = ["ABC Corp", "XYZ Inc"] -location_blacklist = ["Brazil"] -seen_jobs = set() - -# Creating regex patterns -apply_component.title_blacklist_patterns = look_ahead_patterns(title_blacklist) -apply_component.company_blacklist_patterns = look_ahead_patterns(company_blacklist) -apply_component.location_blacklist_patterns = look_ahead_patterns(location_blacklist) -apply_component.seen_jobs = seen_jobs -apply_component.seen_jobs.add("link14") # added link for 'seen link' test - -test_cases = [ - # Blacklist matches for "Data Engineer" in various forms - ("Data Engineer", "Tech Corp", "link1", "USA", True), # Exact match (blacklist) - ("Data Engineer (Gen AI)", "Tech Corp", "link2", "USA", True), # Partial match with parentheses (blacklist) - ("Senior Data Engineer", "Tech Corp", "link3", "USA", True), # Partial match with prefix (blacklist) - ("Engineer, Data", "Tech Corp", "link4", "USA", True), # Words reordered (blacklist) - ("Data-Engineer", "Tech Corp", "link5", "USA", True), # Hyphenated (blacklist) - ("Data & Engineer", "Tech Corp", "link6", "USA", True), # Ampersand separator (blacklist) - - # Blacklist matches for "Brazil" in location in various forms - ("Project Manager", "Tech Corp", "link7", "Brazil", True), # Exact match (blacklist) - ("Project Manager", "Tech Corp", "link8", "Rio de Janeiro, Brazil", True), # Location with city and country (blacklist) - ("Project Manager", "Tech Corp", "link9", "São Paulo - Brazil", True), # Location with hyphen separator (blacklist) - ("Project Manager", "Tech Corp", "link10", "Brazil, South America", True), # Location with continent (blacklist) - - # Blacklist matches for "ABC Corp" in various forms - ("Marketing Specialist", "ABC Corp", "link11", "USA", True), # Exact match (blacklist) - ("Marketing Specialist", "ABC Corporation", "link12", "USA", False), # Variants on corporation, part of a different word - ("Marketing Specialist", "ABC CORP", "link13", "USA", True), # Uppercase variant (blacklist) - - # Seen job link test - ("Marketing Specialist", "DEF Corp", "link14", "USA", True), # Link has been seen (blacklist) - - # Cases that should NOT be blacklisted (expected to pass) - ("Software Developer", "Tech Corp", "link15", "USA", False), # Title not blacklisted - ("Product Engineer", "XYZ Ltd", "link16", "Canada", False), # Title and location not blacklisted - ("Data Science Specialist", "DEF Corp", "link17", "USA", False), # Title similar but not matching blacklist - ("Project Manager", "GHI Inc", "link18", "Argentina", False), # Location close to blacklist but distinct - ("Operations Manager", "ABC Technology", "link19", "USA", False) # Company name similar but not matching -] - -@pytest.mark.parametrize("job_title, company, link, job_location, expected_output", test_cases) -def test_is_blacklisted(job_title, company, link, job_location, expected_output): - actual_output = apply_component.is_blacklisted(job_title, company, link, job_location) - - assert actual_output == expected_output, f"Failed for case: {job_title} at {company} in {job_location} (link: {link})" diff --git a/tests/test_utils.py b/tests/test_utils.py deleted file mode 100644 index 2ca828b44..000000000 --- a/tests/test_utils.py +++ /dev/null @@ -1,86 +0,0 @@ -# tests/test_utils.py -import pytest -import os -import time -from unittest import mock -from selenium.webdriver.remote.webelement import WebElement -from src.utils.browser_utils import is_scrollable, scroll_slow -from src.utils.chrome_utils import chrome_browser_options, ensure_chrome_profile - -# Mocking logging to avoid actual file writing -@pytest.fixture(autouse=True) -def mock_logger(mocker): - mocker.patch("src.logging.logger") - -# Test ensure_chrome_profile function -def test_ensure_chrome_profile(mocker): - mocker.patch("os.path.exists", return_value=False) # Pretend directory doesn't exist - mocker.patch("os.makedirs") # Mock making directories - - # Call the function - profile_path = ensure_chrome_profile() - - # Verify that os.makedirs was called twice to create the directory - assert profile_path.endswith("linkedin_profile") - assert os.path.exists.called - assert os.makedirs.called - -# Test is_scrollable function -def test_is_scrollable(mocker): - mock_element = mocker.Mock(spec=WebElement) - mock_element.get_attribute.side_effect = lambda attr: "1000" if attr == "scrollHeight" else "500" - - # Call the function - scrollable = is_scrollable(mock_element) - - # Check the expected outcome - assert scrollable is True - mock_element.get_attribute.assert_any_call("scrollHeight") - mock_element.get_attribute.assert_any_call("clientHeight") - -# Test scroll_slow function -def test_scroll_slow(mocker): - mock_driver = mocker.Mock() - mock_element = mocker.Mock(spec=WebElement) - - # Mock element's attributes for scrolling - mock_element.get_attribute.side_effect = lambda attr: "2000" if attr == "scrollHeight" else "0" - mock_element.is_displayed.return_value = True - mocker.patch("time.sleep") # Mock time.sleep to avoid waiting - - # Call the function - scroll_slow(mock_driver, mock_element, start=0, end=1000, step=100, reverse=False) - - # Ensure that scrolling happened multiple times - assert mock_driver.execute_script.called - mock_element.is_displayed.assert_called_once() - -def test_scroll_slow_element_not_scrollable(mocker): - mock_driver = mocker.Mock() - mock_element = mocker.Mock(spec=WebElement) - - # Mock the attributes so the element is not scrollable - mock_element.get_attribute.side_effect = lambda attr: "1000" if attr == "scrollHeight" else "1000" - mock_element.is_displayed.return_value = True - - scroll_slow(mock_driver, mock_element, start=0, end=1000, step=100) - - # Ensure it detected non-scrollable element - mock_driver.execute_script.assert_not_called() - -# Test chrome_browser_options function -def test_chrome_browser_options(mocker): - mocker.patch("src.utils.chrome_utils.ensure_chrome_profile") - mocker.patch("os.path.dirname", return_value="/mocked/path") - mocker.patch("os.path.basename", return_value="profile_directory") - - mock_options = mocker.Mock() - - mocker.patch("selenium.webdriver.ChromeOptions", return_value=mock_options) - - # Call the function - options = chrome_browser_options() - - # Ensure options were set - assert mock_options.add_argument.called - assert options == mock_options