diff --git a/config.yaml b/config.yaml
index a4f5a8be..265e0d6f 100644
--- a/config.yaml
+++ b/config.yaml
@@ -20,10 +20,10 @@ sources:
   - agora_proteomics:
     agora_proteomics_files: &agora_proteomics_files
       - name: proteomics
-        id: syn18689335.3
+        id: syn18689335.4
         format: csv
     agora_proteomics_provenance: &agora_proteomics_provenance
-      - syn18689335.3
+      - syn18689335.4
   - agora_proteomics_tmt:
     agora_proteomics_tmt_files: &agora_proteomics_tmt_files
       - name: proteomics_tmt
@@ -89,6 +89,7 @@ datasets:
   - proteomics:
       files: *agora_proteomics_files
       final_format: json
+      custom_transformations: 1
       provenance: *agora_proteomics_provenance
       column_rename:
         genename: hgnc_symbol
@@ -99,6 +100,7 @@ datasets:
   - proteomics_tmt:
       files: *agora_proteomics_tmt_files
       final_format: json
+      custom_transformations: 1
       provenance: *agora_proteomics_tmt_provenance
       column_rename:
         genename: hgnc_symbol
@@ -109,6 +111,7 @@ datasets:
   - proteomics_srm:
       files: *agora_proteomics_srm_files
       final_format: json
+      custom_transformations: 1
       provenance: *agora_proteomics_srm_provenance
       column_rename:
         genename: hgnc_symbol
@@ -290,6 +293,7 @@ datasets:
       custom_transformations: 1
       provenance: *rna_diff_expr_data_provenance
       destination: *dest
+      gx_enabled: true
 
   - proteomics_distribution_data:
       files:
diff --git a/gx_suite_definitions/rna_distributinon_data.ipynb b/gx_suite_definitions/rna_distributinon_data.ipynb
new file mode 100644
index 00000000..1e642889
--- /dev/null
+++ b/gx_suite_definitions/rna_distributinon_data.ipynb
@@ -0,0 +1,243 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import synapseclient\n",
+    "\n",
+    "import great_expectations as gx\n",
+    "\n",
+    "context = gx.get_context(project_root_dir='../src/agoradatatools/great_expectations')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Expectation Suite for RNA Distribution Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Get Example Data File"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "syn = synapseclient.Synapse()\n",
+    "syn.login()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rna_distribution_data_file = syn.get(\"syn28094691\").path\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create Validator Object on Data File"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "validator = context.sources.pandas_default.read_json(\n",
+    "    rna_distribution_data_file\n",
+    ")\n",
+    "validator.expectation_suite_name = \"rna_distribution_data\"\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Add Expectations to Validator Object For Each Column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# model\n",
+    "validator.expect_column_values_to_be_of_type(\"model\", \"str\")\n",
+    "validator.expect_column_values_to_not_be_null(\"model\")\n",
+    "validator.expect_column_values_to_be_in_set(\"model\", [\"AD Diagnosis (males and females)\", \"AD Diagnosis x AOD (males and females)\",\"AD Diagnosis x Sex (females only)\", \"AD Diagnosis x Sex (males only)\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# tissue\n",
+    "validator.expect_column_values_to_be_of_type(\"tissue\", \"str\")\n",
+    "validator.expect_column_values_to_not_be_null(\"tissue\")\n",
+    "validator.expect_column_values_to_be_in_set(\"tissue\", [\"CBE\", \"DLPFC\", \"FP\", \"IFG\", \"PHG\", \"STG\", \"TCX\", \"ACC\", \"PCC\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# min\n",
+    "validator.expect_column_values_to_be_of_type(\"min\", \"float\")\n",
+    "validator.expect_column_values_to_not_be_null(\"min\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# max\n",
+    "validator.expect_column_values_to_be_of_type(\"max\", \"float\")\n",
+    "validator.expect_column_values_to_not_be_null(\"max\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# median\n",
+    "validator.expect_column_values_to_be_of_type(\"median\", \"float\")\n",
+    "validator.expect_column_values_to_not_be_null(\"median\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# first_quartile\n",
+    "validator.expect_column_values_to_be_of_type(\"first_quartile\", \"float\")\n",
+    "validator.expect_column_values_to_not_be_null(\"first_quartile\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# third_quartile\n",
+    "validator.expect_column_values_to_be_of_type(\"third_quartile\", \"float\")\n",
+    "validator.expect_column_values_to_not_be_null(\"third_quartile\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# multi-field\n",
+    "validator.expect_column_pair_values_a_to_be_greater_than_b(\"max\", \"third_quartile\")\n",
+    "validator.expect_column_pair_values_a_to_be_greater_than_b(\"third_quartile\", \"median\")\n",
+    "validator.expect_column_pair_values_a_to_be_greater_than_b(\"median\", \"first_quartile\")\n",
+    "validator.expect_column_pair_values_a_to_be_greater_than_b(\"first_quartile\", \"min\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Save Expectation Suite"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "validator.save_expectation_suite(discard_failed_expectations=False)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create Checkpoint and View Results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "checkpoint = context.add_or_update_checkpoint(\n",
+    "    name=\"agora-test-checkpoint\",\n",
+    "    validator=validator,\n",
+    ")\n",
+    "checkpoint_result = checkpoint.run()\n",
+    "context.view_validation_result(checkpoint_result)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Build Data Docs - Click on Expectation Suite to View All Expectations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "context.build_data_docs()\n",
+    "context.open_data_docs()\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/agoradatatools/etl/transform/__init__.py b/src/agoradatatools/etl/transform/__init__.py
index b281c8b2..ef000e81 100644
--- a/src/agoradatatools/etl/transform/__init__.py
+++ b/src/agoradatatools/etl/transform/__init__.py
@@ -15,6 +15,7 @@
     transform_rnaseq_differential_expression,
 )
 from agoradatatools.etl.transform.team_info import transform_team_info
+from agoradatatools.etl.transform.proteomics import transform_proteomics
 
 __all__ = [
     "transform_distribution_data",
@@ -26,4 +27,5 @@
     "transform_rna_distribution_data",
     "transform_rnaseq_differential_expression",
     "transform_team_info",
+    "transform_proteomics",
 ]
diff --git a/src/agoradatatools/etl/transform/gene_info.py b/src/agoradatatools/etl/transform/gene_info.py
index 55ede085..a812a89e 100644
--- a/src/agoradatatools/etl/transform/gene_info.py
+++ b/src/agoradatatools/etl/transform/gene_info.py
@@ -2,6 +2,7 @@
 import pandas as pd
 
 from agoradatatools.etl.utils import nest_fields
+from agoradatatools.etl import transform
 
 
 def transform_gene_info(
@@ -14,10 +15,10 @@ def transform_gene_info(
     gene_metadata = datasets["gene_metadata"]
     igap = datasets["igap"]
     eqtl = datasets["eqtl"]
-    proteomics = datasets["proteomics"]
+    proteomics = transform.transform_proteomics(df=datasets["proteomics"])
     rna_change = datasets["diff_exp_data"]
-    proteomics_tmt = datasets["proteomics_tmt"]
-    proteomics_srm = datasets["proteomics_srm"]
+    proteomics_tmt = transform.transform_proteomics(df=datasets["proteomics_tmt"])
+    proteomics_srm = transform.transform_proteomics(df=datasets["proteomics_srm"])
     target_list = datasets["target_list"]
     median_expression = datasets["median_expression"]
     druggability = datasets["druggability"]
diff --git a/src/agoradatatools/etl/transform/proteomics.py b/src/agoradatatools/etl/transform/proteomics.py
new file mode 100644
index 00000000..cb6d1e5b
--- /dev/null
+++ b/src/agoradatatools/etl/transform/proteomics.py
@@ -0,0 +1,25 @@
+"""Function for transforming proteomics data. This function is called on all three proteomics
+data sets, although currently it only affects the LFQ data set as it is the only one with "CON__"
+entries.
+"""
+
+import pandas as pd
+
+
+def transform_proteomics(df: pd.DataFrame) -> pd.DataFrame:
+    """Filters out rows that have "CON__" in their uniqid. This label indicates that the protein
+    is a known contaminant and should be removed from the final data set. Rows with an NA uniqid
+    are also removed.
+
+    Args:
+        df (pd.DataFrame]): pandas DataFrame containing proteomics data. Must contain a column
+                            called "uniqid".
+
+    Returns:
+        pd.DataFrame: a DataFrame that is identical to the input DataFrame but with rows containing
+                      "CON__" in the uniqid removed.
+    """
+    # Using "na=True" causes rows with NA uniqids to be set to True so they get removed
+    remove_rows = df["uniqid"].str.contains("CON__", na=True)
+    df = df.drop(df.index[remove_rows])
+    return df
diff --git a/src/agoradatatools/etl/transform/proteomics_distribution.py b/src/agoradatatools/etl/transform/proteomics_distribution.py
index c05c08fe..6c4f3d85 100644
--- a/src/agoradatatools/etl/transform/proteomics_distribution.py
+++ b/src/agoradatatools/etl/transform/proteomics_distribution.py
@@ -1,6 +1,6 @@
 import pandas as pd
 
-from agoradatatools.etl import utils
+from agoradatatools.etl import utils, transform
 
 
 def transform_proteomics_distribution_data(datasets: dict) -> pd.DataFrame:
@@ -18,6 +18,9 @@ def transform_proteomics_distribution_data(datasets: dict) -> pd.DataFrame:
     """
     transformed = []
     for name, dataset in datasets.items():
+        # Remove contaminant ("CON__") entries and rows with NA uniqids before calculating distribution
+        dataset = transform.transform_proteomics(df=dataset)
+
         df = utils.calculate_distribution(
             df=dataset, grouping="tissue", distribution_column="log2_fc"
         )
diff --git a/src/agoradatatools/great_expectations/gx/expectations/rna_distribution_data.json b/src/agoradatatools/great_expectations/gx/expectations/rna_distribution_data.json
new file mode 100644
index 00000000..6607fb37
--- /dev/null
+++ b/src/agoradatatools/great_expectations/gx/expectations/rna_distribution_data.json
@@ -0,0 +1,178 @@
+{
+  "data_asset_type": null,
+  "expectation_suite_name": "rna_distribution_data",
+  "expectations": [
+    {
+      "expectation_type": "expect_column_values_to_be_of_type",
+      "kwargs": {
+        "column": "model",
+        "type_": "str"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_not_be_null",
+      "kwargs": {
+        "column": "model"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_in_set",
+      "kwargs": {
+        "column": "model",
+        "value_set": [
+          "AD Diagnosis (males and females)",
+          "AD Diagnosis x AOD (males and females)",
+          "AD Diagnosis x Sex (females only)",
+          "AD Diagnosis x Sex (males only)"
+        ]
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_of_type",
+      "kwargs": {
+        "column": "tissue",
+        "type_": "str"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_not_be_null",
+      "kwargs": {
+        "column": "tissue"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_in_set",
+      "kwargs": {
+        "column": "tissue",
+        "value_set": [
+          "CBE",
+          "DLPFC",
+          "FP",
+          "IFG",
+          "PHG",
+          "STG",
+          "TCX",
+          "ACC",
+          "PCC"
+        ]
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_of_type",
+      "kwargs": {
+        "column": "min",
+        "type_": "float"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_not_be_null",
+      "kwargs": {
+        "column": "min"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_of_type",
+      "kwargs": {
+        "column": "max",
+        "type_": "float"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_not_be_null",
+      "kwargs": {
+        "column": "max"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_of_type",
+      "kwargs": {
+        "column": "median",
+        "type_": "float"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_not_be_null",
+      "kwargs": {
+        "column": "median"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_of_type",
+      "kwargs": {
+        "column": "first_quartile",
+        "type_": "float"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_not_be_null",
+      "kwargs": {
+        "column": "first_quartile"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_be_of_type",
+      "kwargs": {
+        "column": "third_quartile",
+        "type_": "float"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_values_to_not_be_null",
+      "kwargs": {
+        "column": "third_quartile"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_pair_values_a_to_be_greater_than_b",
+      "kwargs": {
+        "column_A": "max",
+        "column_B": "third_quartile"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_pair_values_a_to_be_greater_than_b",
+      "kwargs": {
+        "column_A": "third_quartile",
+        "column_B": "median"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_pair_values_a_to_be_greater_than_b",
+      "kwargs": {
+        "column_A": "median",
+        "column_B": "first_quartile"
+      },
+      "meta": {}
+    },
+    {
+      "expectation_type": "expect_column_pair_values_a_to_be_greater_than_b",
+      "kwargs": {
+        "column_A": "first_quartile",
+        "column_B": "min"
+      },
+      "meta": {}
+    }
+  ],
+  "ge_cloud_id": null,
+  "meta": {
+    "great_expectations_version": "0.18.1"
+  }
+}
diff --git a/src/agoradatatools/process.py b/src/agoradatatools/process.py
index 8d66b08e..75854767 100644
--- a/src/agoradatatools/process.py
+++ b/src/agoradatatools/process.py
@@ -56,6 +56,9 @@ def apply_custom_transformations(datasets: dict, dataset_name: str, dataset_obj:
         return transform.transform_rna_distribution_data(datasets=datasets)
     if dataset_name == "proteomics_distribution_data":
         return transform.transform_proteomics_distribution_data(datasets=datasets)
+    if dataset_name in ["proteomics", "proteomics_tmt", "proteomics_srm"]:
+        df = datasets[dataset_name]
+        return transform.transform_proteomics(df=df)
     else:
         return None
 
@@ -127,7 +130,7 @@ def process_dataset(
             filename=dataset_name + "." + dataset_obj[dataset_name]["final_format"],
         )
 
-    gx_enabled = "gx_enabled" in dataset_obj[dataset_name].keys()
+    gx_enabled = dataset_obj[dataset_name].get("gx_enabled", False)
 
     if gx_enabled:
         gx_runner = GreatExpectationsRunner(
diff --git a/test_config.yaml b/test_config.yaml
index 53860100..cc1f29a6 100644
--- a/test_config.yaml
+++ b/test_config.yaml
@@ -20,10 +20,10 @@ sources:
   - agora_proteomics:
     agora_proteomics_files: &agora_proteomics_files
       - name: proteomics
-        id: syn18689335.3
+        id: syn18689335.4
         format: csv
     agora_proteomics_provenance: &agora_proteomics_provenance
-      - syn18689335.3
+      - syn18689335.4
   - agora_proteomics_tmt:
     agora_proteomics_tmt_files: &agora_proteomics_tmt_files
       - name: proteomics_tmt
@@ -89,6 +89,7 @@ datasets:
   - proteomics:
       files: *agora_proteomics_files
       final_format: json
+      custom_transformations: 1
       provenance: *agora_proteomics_provenance
       column_rename:
         genename: hgnc_symbol
@@ -99,6 +100,7 @@ datasets:
   - proteomics_tmt:
       files: *agora_proteomics_tmt_files
       final_format: json
+      custom_transformations: 1
       provenance: *agora_proteomics_tmt_provenance
       column_rename:
         genename: hgnc_symbol
@@ -109,6 +111,7 @@ datasets:
   - proteomics_srm:
       files: *agora_proteomics_srm_files
       final_format: json
+      custom_transformations: 1
       provenance: *agora_proteomics_srm_provenance
       column_rename:
         genename: hgnc_symbol
@@ -290,6 +293,7 @@ datasets:
       custom_transformations: 1
       provenance: *rna_diff_expr_data_provenance
       destination: *dest
+      gx_enabled: true
 
   - proteomics_distribution_data:
       files:
diff --git a/tests/test_assets/gene_info/input/proteomics_good_input.csv b/tests/test_assets/gene_info/input/proteomics_good_input.csv
index 7071f5a2..08c9fd1a 100644
--- a/tests/test_assets/gene_info/input/proteomics_good_input.csv
+++ b/tests/test_assets/gene_info/input/proteomics_good_input.csv
@@ -15,3 +15,4 @@ DPM1|O60762,DPM1,O60762,ENSG00000000419,AntPFC,0.052,0.215,-0.111,0.734,1.0
 GCLC|P48506,,P48506,ENSG00000001084,AntPFC,-0.023,0.077,-0.123,0.848,1.0
 CFH|P08603,CFH,P08603,ENSG00000000971,AntPFC,,,,,
 ,CYP51A1,Q16850,ENSG00000001630,AntPFC,0.265,0.567,-0.037,0.099,0.565
+CON__P35908,CON__P35908,P35908,ENSG00000172867,DLPFC,-0.096819,0.16057,-0.354207,0.764535,1.0
diff --git a/tests/test_assets/proteomics/input/proteomics_lfq_good_input.csv b/tests/test_assets/proteomics/input/proteomics_lfq_good_input.csv
new file mode 100644
index 00000000..589cc137
--- /dev/null
+++ b/tests/test_assets/proteomics/input/proteomics_lfq_good_input.csv
@@ -0,0 +1,7 @@
+uniqid,genename,uniprotid,ensg,tissue,log2_fc,ci_upr,ci_lwr,pval,cor_pval
+ACAT1|P24752,ACAT1,P24752,ENSG00000075239,DLPFC,-0.043497787,-0.001436119,-0.085559454,0.039605844,0.121519784
+CON__P35908,CON__P35908,P35908,ENSG00000172867,MFG,-0.096819,0.16057,-0.354207,0.764535,1.0
+MOGS|Q13724-2,MOGS,Q13724-2,ENSG00000115275,MFG,-0.024737651,0.268741708,-0.318217011,0.977232785,0.999999988
+CPLX1|O14810,CPLX1,O14810,ENSG00000168993,TCX,-0.095310355,0.051454069,-0.242074778,0.277450316,0.793276833
+MYH14|Q7Z406,MYH14,Q7Z406,ENSG00000105357,AntPFC,0.036596903,0.187572953,-0.114379147,0.835493315,0.999999904
+CON__P35908,CON__P35908,P35908,ENSG00000172867,DLPFC,-0.096819,0.16057,-0.354207,0.764535,1.0
diff --git a/tests/test_assets/proteomics/input/proteomics_lfq_missing_input.csv b/tests/test_assets/proteomics/input/proteomics_lfq_missing_input.csv
new file mode 100644
index 00000000..035cedc3
--- /dev/null
+++ b/tests/test_assets/proteomics/input/proteomics_lfq_missing_input.csv
@@ -0,0 +1,4 @@
+uniqid,genename,uniprotid,ensg,tissue,log2_fc,ci_upr,ci_lwr,pval,cor_pval
+ACAT1|P24752,ACAT1,P24752,ENSG00000075239,DLPFC,-0.043497787,-0.001436119,-0.085559454,0.039605844,0.121519784
+,DDX39B,Q13838,ENSG00000198563,DLPFC,0.067231618,0.137345114,-0.002881877,0.065488223,0.183367023
+FKBP8|Q14318,FKBP8,Q14318,ENSG00000105701,DLPFC,-0.037274408,0.048093291,-0.122642107,0.67121033,1
diff --git a/tests/test_assets/proteomics/input/proteomics_lfq_no_uniqid_input.csv b/tests/test_assets/proteomics/input/proteomics_lfq_no_uniqid_input.csv
new file mode 100644
index 00000000..45fef0d3
--- /dev/null
+++ b/tests/test_assets/proteomics/input/proteomics_lfq_no_uniqid_input.csv
@@ -0,0 +1,4 @@
+bad_field,genename,uniprotid,ensg,tissue,log2_fc,ci_upr,ci_lwr,pval,cor_pval
+ACAT1|P24752,ACAT1,P24752,ENSG00000075239,DLPFC,-0.043497787,-0.001436119,-0.085559454,0.039605844,0.121519784
+MOGS|Q13724-2,MOGS,Q13724-2,ENSG00000115275,MFG,-0.024737651,0.268741708,-0.318217011,0.977232785,0.999999988
+FKBP8|Q14318,FKBP8,Q14318,ENSG00000105701,DLPFC,-0.037274408,0.048093291,-0.122642107,0.67121033,1
diff --git a/tests/test_assets/proteomics/input/proteomics_srm_good_input.csv b/tests/test_assets/proteomics/input/proteomics_srm_good_input.csv
new file mode 100644
index 00000000..9ba79205
--- /dev/null
+++ b/tests/test_assets/proteomics/input/proteomics_srm_good_input.csv
@@ -0,0 +1,6 @@
+uniqid,hgnc_symbol,uniprotid,ensembl_gene_id,tissue,log2_fc,ci_upr,ci_lwr,pval,cor_pval
+UQCR10|Q9UDW1,UQCR10,Q9UDW1,ENSG00000184076,DLPFC,-0.047856325401006046,-0.006795632637535513,-0.08891701816447659,0.017404524464477378,0.050609778954836496
+RUVBL1|Q9Y265,RUVBL1,Q9Y265,ENSG00000175792,DLPFC,0.05453315389554793,0.09014995830291843,0.018916349488177428,0.0009952823691273815,0.0049961119358832295
+CASS4|Q9NQ75,CASS4,Q9NQ75,ENSG00000087589,DLPFC,-0.029649429337509504,0.06993115111279105,-0.12923000978781007,0.7642063208064378,0.9998368407824682
+SLC6A12|P48065,SLC6A12,P48065,ENSG00000111181,DLPFC,0.04243291583556599,0.10994911407693496,-0.025083282405802973,0.30332095961363803,0.5210898536952243
+SPP1|P10451,SPP1,P10451,ENSG00000118785,DLPFC,0.4912046816154135,0.6957173937274684,0.2866919695033585,6.70612867459397e-08,9.97536640345853e-07
diff --git a/tests/test_assets/proteomics/input/proteomics_tmt_good_input.csv b/tests/test_assets/proteomics/input/proteomics_tmt_good_input.csv
new file mode 100644
index 00000000..efab3594
--- /dev/null
+++ b/tests/test_assets/proteomics/input/proteomics_tmt_good_input.csv
@@ -0,0 +1,6 @@
+uniqid,hgnc_symbol,uniprotid,ensembl_gene_id,tissue,log2_fc,ci_upr,ci_lwr,pval,cor_pval
+AKAP5|P24588,AKAP5,P24588,ENSG00000179841,DLPFC,-0.0509573858882245,0.0115626499396124,-0.113477421716061,0.109363318498206,0.288347297776616
+CRYZ|Q08257,CRYZ,Q08257,ENSG00000116791,DLPFC,0.0867240045720519,0.186673804841658,-0.0132257956975545,0.0884904979890596,0.253483015194782
+TRMT1|Q9NXH9,TRMT1,Q9NXH9,ENSG00000104907,DLPFC,0.0338755789687381,0.101380502490424,-0.0336293445529475,0.32282128810998,0.550613717538814
+HIKESHI|Q53FT3,HIKESHI,Q53FT3,ENSG00000149196,DLPFC,0.0235456798211915,0.0578409455829091,-0.0107495859405262,0.176890998787692,0.388261871125487
+ATP8A2|Q9NTI2,ATP8A2,Q9NTI2,ENSG00000132932,DLPFC,0.0376416492318968,0.0625255578627375,0.0127577406010561,0.0032829559268753,0.0311245402228597
diff --git a/tests/test_assets/proteomics/output/proteomics_lfq_good_output.json b/tests/test_assets/proteomics/output/proteomics_lfq_good_output.json
new file mode 100644
index 00000000..30bd4fe1
--- /dev/null
+++ b/tests/test_assets/proteomics/output/proteomics_lfq_good_output.json
@@ -0,0 +1,50 @@
+[
+  {
+    "uniqid": "ACAT1|P24752",
+    "genename": "ACAT1",
+    "uniprotid": "P24752",
+    "ensg": "ENSG00000075239",
+    "tissue": "DLPFC",
+    "log2_fc": -0.043497787,
+    "ci_upr": -0.001436119,
+    "ci_lwr": -0.085559454,
+    "pval": 0.039605844,
+    "cor_pval": 0.121519784
+  },
+  {
+    "uniqid": "MOGS|Q13724-2",
+    "genename": "MOGS",
+    "uniprotid": "Q13724-2",
+    "ensg": "ENSG00000115275",
+    "tissue": "MFG",
+    "log2_fc": -0.024737651,
+    "ci_upr": 0.268741708,
+    "ci_lwr": -0.318217011,
+    "pval": 0.977232785,
+    "cor_pval": 0.999999988
+  },
+  {
+    "uniqid": "CPLX1|O14810",
+    "genename": "CPLX1",
+    "uniprotid": "O14810",
+    "ensg": "ENSG00000168993",
+    "tissue": "TCX",
+    "log2_fc": -0.095310355,
+    "ci_upr": 0.051454069,
+    "ci_lwr": -0.242074778,
+    "pval": 0.277450316,
+    "cor_pval": 0.793276833
+  },
+  {
+    "uniqid": "MYH14|Q7Z406",
+    "genename": "MYH14",
+    "uniprotid": "Q7Z406",
+    "ensg": "ENSG00000105357",
+    "tissue": "AntPFC",
+    "log2_fc": 0.036596903,
+    "ci_upr": 0.187572953,
+    "ci_lwr": -0.114379147,
+    "pval": 0.835493315,
+    "cor_pval": 0.999999904
+  }
+]
\ No newline at end of file
diff --git a/tests/test_assets/proteomics/output/proteomics_lfq_missing_output.json b/tests/test_assets/proteomics/output/proteomics_lfq_missing_output.json
new file mode 100644
index 00000000..31424804
--- /dev/null
+++ b/tests/test_assets/proteomics/output/proteomics_lfq_missing_output.json
@@ -0,0 +1,26 @@
+[
+  {
+    "uniqid": "ACAT1|P24752",
+    "genename": "ACAT1",
+    "uniprotid": "P24752",
+    "ensg": "ENSG00000075239",
+    "tissue": "DLPFC",
+    "log2_fc": -0.043497787,
+    "ci_upr": -0.001436119,
+    "ci_lwr": -0.085559454,
+    "pval": 0.039605844,
+    "cor_pval": 0.121519784
+  },
+  {
+    "uniqid": "FKBP8|Q14318",
+    "genename": "FKBP8",
+    "uniprotid": "Q14318",
+    "ensg": "ENSG00000105701",
+    "tissue": "DLPFC",
+    "log2_fc": -0.037274408,
+    "ci_upr": 0.048093291,
+    "ci_lwr": -0.122642107,
+    "pval": 0.67121033,
+    "cor_pval": 1
+  }
+]
\ No newline at end of file
diff --git a/tests/test_assets/proteomics/output/proteomics_srm_good_output.json b/tests/test_assets/proteomics/output/proteomics_srm_good_output.json
new file mode 100644
index 00000000..5dcf6fc1
--- /dev/null
+++ b/tests/test_assets/proteomics/output/proteomics_srm_good_output.json
@@ -0,0 +1,62 @@
+[
+  {
+    "uniqid": "UQCR10|Q9UDW1",
+    "hgnc_symbol": "UQCR10",
+    "uniprotid": "Q9UDW1",
+    "ensembl_gene_id": "ENSG00000184076",
+    "tissue": "DLPFC",
+    "log2_fc": -0.047856325401006046,
+    "ci_upr": -0.006795632637535513,
+    "ci_lwr": -0.08891701816447659,
+    "pval": 0.017404524464477378,
+    "cor_pval": 0.050609778954836496
+  },
+  {
+    "uniqid": "RUVBL1|Q9Y265",
+    "hgnc_symbol": "RUVBL1",
+    "uniprotid": "Q9Y265",
+    "ensembl_gene_id": "ENSG00000175792",
+    "tissue": "DLPFC",
+    "log2_fc": 0.05453315389554793,
+    "ci_upr": 0.09014995830291843,
+    "ci_lwr": 0.018916349488177428,
+    "pval": 0.0009952823691273815,
+    "cor_pval": 0.0049961119358832295
+  },
+  {
+    "uniqid": "CASS4|Q9NQ75",
+    "hgnc_symbol": "CASS4",
+    "uniprotid": "Q9NQ75",
+    "ensembl_gene_id": "ENSG00000087589",
+    "tissue": "DLPFC",
+    "log2_fc": -0.029649429337509504,
+    "ci_upr": 0.06993115111279105,
+    "ci_lwr": -0.12923000978781007,
+    "pval": 0.7642063208064378,
+    "cor_pval": 0.9998368407824682
+  },
+  {
+    "uniqid": "SLC6A12|P48065",
+    "hgnc_symbol": "SLC6A12",
+    "uniprotid": "P48065",
+    "ensembl_gene_id": "ENSG00000111181",
+    "tissue": "DLPFC",
+    "log2_fc": 0.04243291583556599,
+    "ci_upr": 0.10994911407693496,
+    "ci_lwr": -0.025083282405802973,
+    "pval": 0.30332095961363803,
+    "cor_pval": 0.5210898536952243
+  },
+  {
+    "uniqid": "SPP1|P10451",
+    "hgnc_symbol": "SPP1",
+    "uniprotid": "P10451",
+    "ensembl_gene_id": "ENSG00000118785",
+    "tissue": "DLPFC",
+    "log2_fc": 0.4912046816154135,
+    "ci_upr": 0.6957173937274684,
+    "ci_lwr": 0.2866919695033585,
+    "pval": 6.70612867459397e-08,
+    "cor_pval": 9.97536640345853e-07
+  }
+]
\ No newline at end of file
diff --git a/tests/test_assets/proteomics/output/proteomics_tmt_good_output.json b/tests/test_assets/proteomics/output/proteomics_tmt_good_output.json
new file mode 100644
index 00000000..9862f30a
--- /dev/null
+++ b/tests/test_assets/proteomics/output/proteomics_tmt_good_output.json
@@ -0,0 +1,62 @@
+[
+  {
+    "uniqid": "AKAP5|P24588",
+    "hgnc_symbol": "AKAP5",
+    "uniprotid": "P24588",
+    "ensembl_gene_id": "ENSG00000179841",
+    "tissue": "DLPFC",
+    "log2_fc": -0.0509573858882245,
+    "ci_upr": 0.0115626499396124,
+    "ci_lwr": -0.113477421716061,
+    "pval": 0.109363318498206,
+    "cor_pval": 0.288347297776616
+  },
+  {
+    "uniqid": "CRYZ|Q08257",
+    "hgnc_symbol": "CRYZ",
+    "uniprotid": "Q08257",
+    "ensembl_gene_id": "ENSG00000116791",
+    "tissue": "DLPFC",
+    "log2_fc": 0.0867240045720519,
+    "ci_upr": 0.186673804841658,
+    "ci_lwr": -0.0132257956975545,
+    "pval": 0.0884904979890596,
+    "cor_pval": 0.253483015194782
+  },
+  {
+    "uniqid": "TRMT1|Q9NXH9",
+    "hgnc_symbol": "TRMT1",
+    "uniprotid": "Q9NXH9",
+    "ensembl_gene_id": "ENSG00000104907",
+    "tissue": "DLPFC",
+    "log2_fc": 0.0338755789687381,
+    "ci_upr": 0.101380502490424,
+    "ci_lwr": -0.0336293445529475,
+    "pval": 0.32282128810998,
+    "cor_pval": 0.550613717538814
+  },
+  {
+    "uniqid": "HIKESHI|Q53FT3",
+    "hgnc_symbol": "HIKESHI",
+    "uniprotid": "Q53FT3",
+    "ensembl_gene_id": "ENSG00000149196",
+    "tissue": "DLPFC",
+    "log2_fc": 0.0235456798211915,
+    "ci_upr": 0.0578409455829091,
+    "ci_lwr": -0.0107495859405262,
+    "pval": 0.176890998787692,
+    "cor_pval": 0.388261871125487
+  },
+  {
+    "uniqid": "ATP8A2|Q9NTI2",
+    "hgnc_symbol": "ATP8A2",
+    "uniprotid": "Q9NTI2",
+    "ensembl_gene_id": "ENSG00000132932",
+    "tissue": "DLPFC",
+    "log2_fc": 0.0376416492318968,
+    "ci_upr": 0.0625255578627375,
+    "ci_lwr": 0.0127577406010561,
+    "pval": 0.0032829559268753,
+    "cor_pval": 0.0311245402228597
+  }
+]
\ No newline at end of file
diff --git a/tests/test_assets/proteomics_distribution_data/input/test_proteomics_distribution_lfq_good_input.csv b/tests/test_assets/proteomics_distribution_data/input/test_proteomics_distribution_lfq_good_input.csv
index 0f257ccb..08c39f08 100644
--- a/tests/test_assets/proteomics_distribution_data/input/test_proteomics_distribution_lfq_good_input.csv
+++ b/tests/test_assets/proteomics_distribution_data/input/test_proteomics_distribution_lfq_good_input.csv
@@ -15,4 +15,5 @@ CPLX1|O14810,CPLX1,O14810,ENSG00000168993,TCX,-0.095310355,0.051454069,-0.242074
 KPNA1|P52294,KPNA1,P52294,ENSG00000114030,AntPFC,0.023613726,0.194829171,-0.147601719,0.943425986,0.999999904
 CRYZ|Q08257,CRYZ,Q08257,ENSG00000116791,AntPFC,-0.045658922,0.092475201,-0.183793045,0.716192618,0.999999904
 CPSF6|Q16630-3,CPSF6,Q16630-3,ENSG00000111605,AntPFC,-0.08756701,0.162699641,-0.337833662,0.688026768,0.999999904
-MYH14|Q7Z406,MYH14,Q7Z406,ENSG00000105357,AntPFC,0.036596903,0.187572953,-0.114379147,0.835493315,0.999999904
\ No newline at end of file
+MYH14|Q7Z406,MYH14,Q7Z406,ENSG00000105357,AntPFC,0.036596903,0.187572953,-0.114379147,0.835493315,0.999999904
+CON__P35908,CON__P35908,P35908,ENSG00000172867,DLPFC,-0.096819,0.16057,-0.354207,0.764535,1.0
diff --git a/tests/test_process.py b/tests/test_process.py
index 07a3bc50..7dd389d6 100644
--- a/tests/test_process.py
+++ b/tests/test_process.py
@@ -66,7 +66,17 @@ class TestProcessDataset:
         }
     }
 
-    def setup_method(self, syn):
+    dataset_object_gx_disabled = {
+        "neuropath_corr": {
+            "files": [{"name": "test_file_1", "id": "syn1111111", "format": "csv"}],
+            "final_format": "json",
+            "provenance": ["syn1111111"],
+            "destination": "syn1111113",
+            "gx_enabled": False,
+        }
+    }
+
+    def setup_method(self):
         self.patch_get_entity_as_df = patch.object(
             extract, "get_entity_as_df", return_value=pd.DataFrame
         ).start()
@@ -114,7 +124,7 @@ def teardown_method(self):
         self.patch_format_link.stop()
         mock.patch.stopall()
 
-    def test_process_dataset_upload_false_gx_disabled(self, syn: Any):
+    def test_process_dataset_upload_false_gx_not_specified(self, syn: Any):
         process.process_dataset(
             dataset_obj=self.dataset_object,
             staging_path=STAGING_PATH,
@@ -142,7 +152,9 @@ def test_process_dataset_upload_false_gx_disabled(self, syn: Any):
         self.patch_format_link.assert_not_called()
         self.patch_load.assert_not_called()
 
-    def test_process_dataset_upload_false_gx_disabled_column_rename(self, syn: Any):
+    def test_process_dataset_upload_false_gx_not_specified_column_rename(
+        self, syn: Any
+    ):
         process.process_dataset(
             dataset_obj=self.dataset_object_col_rename,
             staging_path=STAGING_PATH,
@@ -172,7 +184,7 @@ def test_process_dataset_upload_false_gx_disabled_column_rename(self, syn: Any):
         self.patch_format_link.assert_not_called()
         self.patch_load.assert_not_called()
 
-    def test_process_dataset_upload_false_gx_disabled_custom_transformations(
+    def test_process_dataset_upload_false_gx_not_specified_custom_transformations(
         self, syn: Any
     ):
         process.process_dataset(
@@ -214,7 +226,9 @@ def test_process_dataset_upload_false_gx_disabled_custom_transformations(
 
     # This test looks like a duplicate of test_process_dataset_upload_false_gx_disabled
     # but it uses the agora_rename configuration with the same util function
-    def test_process_dataset_upload_false_gx_disabled_with_agora_rename(self, syn: Any):
+    def test_process_dataset_upload_false_gx_not_specified_with_agora_rename(
+        self, syn: Any
+    ):
         process.process_dataset(
             dataset_obj=self.dataset_object_col_rename,
             staging_path=STAGING_PATH,
@@ -244,7 +258,7 @@ def test_process_dataset_upload_false_gx_disabled_with_agora_rename(self, syn: A
         self.patch_format_link.assert_not_called()
         self.patch_load.assert_not_called()
 
-    def test_process_dataset_upload_false_gx_disabled_type_dict(self, syn: Any):
+    def test_process_dataset_upload_false_gx_not_specified_type_dict(self, syn: Any):
         self.patch_standardize_values.return_value = dict()
         process.process_dataset(
             dataset_obj=self.dataset_object,
@@ -275,7 +289,7 @@ def test_process_dataset_upload_false_gx_disabled_type_dict(self, syn: Any):
 
     def test_process_dataset_upload_true_gx_disabled(self, syn: Any):
         process.process_dataset(
-            dataset_obj=self.dataset_object,
+            dataset_obj=self.dataset_object_gx_disabled,
             staging_path=STAGING_PATH,
             gx_folder=GX_FOLDER,
             syn=syn,
diff --git a/tests/transform/test_proteomics.py b/tests/transform/test_proteomics.py
new file mode 100644
index 00000000..1302257d
--- /dev/null
+++ b/tests/transform/test_proteomics.py
@@ -0,0 +1,82 @@
+"""Integration test for the proteomics LFQ transform.
+The transform should successfully filter out proteins that start with "CON__" and should remove
+rows that are missing a uniqid value. The only failure case for this transform is when "uniqid"
+is not a column in the data frame.
+"""
+
+import os
+
+import pandas as pd
+import pytest
+
+from agoradatatools.etl.transform import proteomics
+
+
+class TestTranformProteomics:
+    """Class for testing the transform.
+    ADT currently ingests three proteomics data sets (LFQ, TMT, SRM) and runs the transform on each.
+    Currently only LFQ data is actually modified by the transform, so the TMT and SRM test data
+    should not be changed by the transform.
+    """
+
+    data_files_path = "tests/test_assets/proteomics"
+    pass_test_data = [
+        (  # pass with good data (LFQ)
+            "proteomics_lfq_good_input.csv",
+            "proteomics_lfq_good_output.json",
+        ),
+        (  # pass with missing data (LFQ)
+            "proteomics_lfq_missing_input.csv",
+            "proteomics_lfq_missing_output.json",
+        ),
+        (  # pass with good data (TMT)
+            "proteomics_tmt_good_input.csv",
+            "proteomics_tmt_good_output.json",
+        ),
+        (  # pass with good data (SRM)
+            "proteomics_srm_good_input.csv",
+            "proteomics_srm_good_output.json",
+        ),
+    ]
+    pass_test_ids = [
+        "Pass with good data (LFQ)",
+        "Pass with missing data (LFQ)",
+        "Pass with good data (TMT)",
+        "Pass with good data (SRM)",
+    ]
+    fail_test_data = [
+        "proteomics_lfq_no_uniqid_input.csv",
+    ]
+    fail_test_ids = [
+        "Fail with missing uniqid column",
+    ]
+
+    @pytest.mark.parametrize(
+        "input_file, expected_output_file", pass_test_data, ids=pass_test_ids
+    )
+    def test_transform_proteomics_should_pass(
+        self, input_file: str, expected_output_file: str
+    ) -> None:
+        """Passing conditions: "CON__" proteins and proteins with NA uniqids are removed, all
+        other rows are left intact.
+        """
+        input_df = pd.read_csv(os.path.join(self.data_files_path, "input", input_file))
+
+        # reset_index is necessary because the index values need to match the expected output, but
+        # if rows are removed from the output, the index values will differ.
+        output_df = proteomics.transform_proteomics(df=input_df).reset_index(drop=True)
+        expected_df = pd.read_json(
+            os.path.join(self.data_files_path, "output", expected_output_file),
+        )
+        pd.testing.assert_frame_equal(output_df, expected_df)
+
+    @pytest.mark.parametrize("input_file", fail_test_data, ids=fail_test_ids)
+    def test_transform_proteomics_should_fail(self, input_file: str) -> None:
+        """Failure condition: "uniqid" is not a column in the data frame.
+        This should throw a KeyError.
+        """
+        with pytest.raises(KeyError):
+            input_df = pd.read_csv(
+                os.path.join(self.data_files_path, "input", input_file)
+            )
+            proteomics.transform_proteomics(df=input_df)
diff --git a/tests/transform/test_proteomics_distribution_data.py b/tests/transform/test_proteomics_distribution_data.py
index 5281e845..941c1d04 100644
--- a/tests/transform/test_proteomics_distribution_data.py
+++ b/tests/transform/test_proteomics_distribution_data.py
@@ -7,10 +7,9 @@
 
 
 # NOTE: This test's input is structured a little differently than the other transform
-# tests because we may have up to 3 input files with specific dataset names but the
-# test should work with the current 2 we support, and won't require modifying the
-# test functions to add SRM data later. This structure also lets us test what happens
-# when we input a file with an unsupported proteomics type.
+# tests because we may have up to 3 input files with specific dataset names but can have fewer, as
+# there used to be only 2 proteomics data sets. This structure also lets us test what happens when
+# we input a file with an unsupported proteomics type.
 class TestTransformProteomicsDistributionData:
     data_files_path = "tests/test_assets/proteomics_distribution_data"
     pass_test_data = [