index.html

<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <meta name="description"
        content="Multi-Task Text-Guided Mobile Manipulation with Visual-Force Goals">
  <meta name="keywords" content="Force, Imitation Learning, Robotics, Transformers">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>ForceSight: Text-Guided Mobile Manipulation with Visual-Force Goals</title>

  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
        rel="stylesheet">

  <link rel="stylesheet" href="./static/css/bulma.min.css">
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
        href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="./static/css/index.css">
  <!-- <link rel="icon" href="./static/images/favicon.svg"> -->
  <link rel="icon" href="./static/images/forcesight_logo.png">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script defer src="./static/js/fontawesome.all.min.js"></script>
  <script src="./static/js/bulma-carousel.min.js"></script>
  <script src="./static/js/bulma-slider.min.js"></script>
  <!-- <script src="./static/js/index.js"></script> -->

  <!-- Google tag (gtag.js) -->
  <script async src="https://www.googletagmanager.com/gtag/js?id=G-4V98WMNGBD"></script>
  <script>
    window.dataLayer = window.dataLayer || [];
    function gtag(){dataLayer.push(arguments);}
    gtag('js', new Date());
    gtag('config', 'G-4V98WMNGBD');
  </script>
</head>
<body>

<section class="hero">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <div class="columns is-centered">
        <div class="column has-text-centered">
          <h1 class="title is-1 publication-title">ForceSight: Text-Guided Mobile Manipulation with Visual-Force Goals</h1>
          <div class="subtitle is-3 publication-venue">
            <h2 style="font-weight: bolder">ICRA 2024</h2>
          </div>
          <div class="is-size-5 publication-authors">
            <span class="author-block">
              <a href="https://jeremy-collins.github.io/">Jeremy A. Collins</a><sup>*</sup><sup>1</sup>,
            </span>
            <span class="author-block">
              <a href="https://codyhouff.github.io/">Cody Houff</a><sup>*</sup><sup>1</sup>,
            </span>
            <span class="author-block">
              <a href="https://youliangtan.github.io/">You Liang Tan</a><sup>*</sup><sup>1</sup>,
            </span>
            <span class="author-block">
              <a href="https://charliekemp.com/">Charlie C. Kemp</a><sup>1</sup>
            </span>

            <div class="is-size-5 publication-authors">
              <span class="author-block"><sup>*</sup>Equal Contribution</span>
              <span class="author-block"><sup>1</sup>Georgia Institute of Technology</span>
            </div>
        </div>
        <div class="column has-text-centered">
          <div class="publication-links">
            <!-- PDF Link.
            <span class="link-block">
              <a href="./static/paper/ForceSight.pdf"
                 class="external-link button is-normal is-rounded is-dark">
                <span class="icon">
                    <i class="fas fa-file-pdf"></i>
                </span>
                <span>Paper</span>
              </a>
            </span> -->
            <!-- arXiv Link. -->
            <span class="link-block">
              <a href="https://arxiv.org/abs/2309.12312"
                 class="external-link button is-normal is-rounded is-dark">
                <span class="icon">
                    <i class="ai ai-arxiv"></i>
                </span>
                <span>Paper</span>
              </a>
            </span>
            <!-- Video Link. -->
            <!-- <span class="link-block">
              <a href="https://www.youtube.com/watch?v=MrKrnHhk8IA"
                 class="external-link button is-normal is-rounded is-dark">
                <span class="icon">
                    <i class="fab fa-youtube"></i>
                </span>
                <span>Video</span>
              </a>
            </span> -->
            <!-- Code Link. -->
            <span class="link-block">
              <a href="https://github.com/force-sight/forcesight"
                 class="external-link button is-normal is-rounded is-dark">
                <span class="icon">
                    <i class="fab fa-github"></i>
                </span>
                <span>Code</span>
                </a>
            </span>
            <!-- Dataset Link. -->
            <span class="link-block">
              <a href="https://1drv.ms/f/s!AjebifpxoPl5hO5bu91QCJSDizws9g?e=h9AlnZ"
                 class="external-link button is-normal is-rounded is-dark">
                <span class="icon">
                    <i class="fas fa-database"></i>
                </span>
                <span>Models/Dataset</span>
              </a>
            <!-- Appendix Link. -->
            <span class="link-block">
              <a href="./static/paper/ForceSight.pdf"
                  class="external-link button is-normal is-rounded is-dark">
                <span class="icon">
                    <i class="fas fa-file-pdf"></i>
                </span>
                <span>Appendix (coming soon!)</span>
              </a>
            </span>
          </div>
          <!-- <div style="color: grey; font-style: italic; font-size: small;">Paper last revised: Sept 23 2023</div> -->
        </div>
      </div>
    </div>
  </div>
</section>

<!-- <section class="hero teaser">
  <div class="container is-max-desktop">
    <div class="hero-body">
      <video id="teaser" autoplay muted loop playsinline height="100%">
        <source src="./static/videos/keys_cubicle_combined.mp4"
                type="video/mp4">
      </video>
      <h2 class="subtitle has-text-centered">
        <span class="dnerf">ForceSight</span> proposes visual-force goals for mobile manipulation, enabling a variety of robotic tasks.
      </h2>
    </div>
  </div>
</section> -->

<!-- replacing the above video with an image -->
<section class="hero teaser">
  <div class="container is-max-desktop">
    <div class="hero-body">
      <img src="./static/images/Headliner.png" class="interpolation-image is-centered" alt="ForceSight teaser"/>
      <h2 class="subtitle has-text-centered">
        <span class="dnerf">ForceSight</span> is an RGBD-adapted, text-conditioned vision transformer. Given an RGBD image and a text prompt, <span class="dnerf">ForceSight</span> produces visual-force
        goals for a mobile manipulator. Action primitives, shown below each image, are appended to the text input by a simple low-level controller.
      </h2>
    </div>
  </div>

<!-- <section class="hero is-light is-small">
  <div class="hero-body">
    <div class="container">
      <div id="results-carousel" class="carousel results-carousel">
        <div class="item item-steve">
          <video poster="" id="steve" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/steve.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-chair-tp">
          <video poster="" id="chair-tp" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/chair-tp.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-shiba">
          <video poster="" id="shiba" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/shiba.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-fullbody">
          <video poster="" id="fullbody" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/fullbody.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-blueshirt">
          <video poster="" id="blueshirt" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/blueshirt.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-mask">
          <video poster="" id="mask" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/mask.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-coffee">
          <video poster="" id="coffee" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/coffee.mp4"
                    type="video/mp4">
          </video>
        </div>
        <div class="item item-toby">
          <video poster="" id="toby" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/toby2.mp4"
                    type="video/mp4">
          </video>
        </div>
      </div>
    </div>
  </div>
</section> -->


<section class="section">
  <div class="container is-max-desktop">
    <!-- Abstract. -->
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Summary</h2>
        <div class="content has-text-justified">
          <!-- Summary -->
        <!-- <h3 class="title is-4">"Open the drawer "</h3> -->
        <!-- <div class="content has-text-justified">
          <p>
            Open a drawer task using <span class="dnerf">ForceSight</span>. (Video is sped up x 4)
          </p>
        </div> -->
        <div class="content has-text-centered">
          <video id="replay-video"
                controls
                muted
                preload
                playsinline
                width="100%">
            <!-- <source src="./static/videos/open-drawerx4.mp4" -->
            <source src="./static/videos/forcesight_summary.mp4"
                    type="video/mp4">
          </video>
        </div>
        <!--/ Summary -->
          <p>
            <!-- Prior work has demonstrated that deep models that output kinematic keyframes enable manipulation by real robots with human-interpretable motion goals. Contact forces are critical to manipulation, yet have typically been relegated to lower-level execution during keyframe-driven manipulation. We present ForceSight, a system for multi-task, text-guided mobile manipulation with a deep model that outputs visual and force goals (visual-force keyframes) suitable for visual-force servoing. Given a single RGBD image and a text prompt as input, ForceSight's deep model outputs a visual-force goal, which can be inferred at a high enough rate to work with a moving camera. We evaluate ForceSight using an eye-in-hand RGBD camera on a mobile manipulator.  We show that by explicitly representing net applied force and grip force, ForceSight predicts forces suitable to the task, operates more effectively, and provides human-interpretable force goals. -->
            We present <span class="dnerf">ForceSight</span>, a transformer-based robotic planner that generates force-based objectives given a text input and an RGBD image, empowering mobile manipulators to plan and execute contact-rich tasks. We demonstrate the utility of <span class="dnerf">ForceSight</span> with 10 mobile manipulation tasks using an eye-in-hand RGBD camera on a mobile manipulator, successfully generalizing to novel environments and unseen object instances. We show that by explicitly predicting force goals, <span class="dnerf">ForceSight</span> predicts forces suitable to the task, operates more effectively, and provides human-interpretable force goals.
          </p>
          <!-- <p>
            Our approach augments neural radiance fields
            (NeRF) by optimizing an
            additional continuous volumetric deformation field that warps each observed point into a
            canonical 5D NeRF.
            We observe that these NeRF-like deformation fields are prone to local minima, and
            propose a coarse-to-fine optimization method for coordinate-based models that allows for
            more robust optimization.
            By adapting principles from geometry processing and physical simulation to NeRF-like
            models, we propose an elastic regularization of the deformation field that further
            improves robustness.
          </p>
          <p>
            We show that <span class="dnerf">Nerfies</span> can turn casually captured selfie
            photos/videos into deformable NeRF
            models that allow for photorealistic renderings of the subject from arbitrary
            viewpoints, which we dub <i>"nerfies"</i>. We evaluate our method by collecting data
            using a
            rig with two mobile phones that take time-synchronized photos, yielding train/validation
            images of the same pose at different viewpoints. We show that our method faithfully
            reconstructs non-rigidly deforming scenes and reproduces unseen views with high
            fidelity.
          </p> -->
        </div>
      </div>
    </div>
    <!--/ Abstract. -->

    <!-- Paper video. -->
    <!-- <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Video</h2>
        <div class="publication-video">
          <iframe src="https://www.youtube.com/embed/MrKrnHhk8IA?rel=0&amp;showinfo=0"
                  frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
        </div>
      </div>
    </div> -->
    <!--/ Paper video. -->
  </div>
</section>


<section class="section">
  <div class="container is-max-desktop">

      <!-- Tasks -->
      <div class="columns is-centered">
        <div class="column is-full-width">
          <!-- <h2 class="title is-3">Task Samples</h2> -->
            
          <h3 class="title is-4">"Pick up the paperclip" </h3>
          <div class="content has-text-centered">
            <video autoplay="" loop="" muted="" src="./static/videos/1x3_videos_paperclip.mp4" class="interpolation-image is-centered"  alt="Interpolation end reference image."></video>
          </div>
          
          <h3 class="title is-4">"Pick up the apple" </h3>
          <div class="content has-text-centered">
            <video autoplay="" loop="" muted="" src="./static/videos/1x3_videos_apple.mp4" class="interpolation-image is-centered"  alt="Interpolation end reference image."></video>
          </div>
          
          <h3 class="title is-4">"Open the drawer" </h3>
          <div class="content has-text-centered">
            <video autoplay="" loop="" muted="" src="./static/videos/1x3_videos_drawer.mp4" class="interpolation-image is-centered"  alt="Interpolation end reference image."></video>
          </div>
  
          <!-- <h3 class="title is-4">"Turn on the light "</h3>
          <div class="content has-text-centered">
            <video id="replay-video"
                  controls
                  muted
                  preload
                  playsinline
                  width="75%">
              <source src="./static/videos/light_atriumx4.mp4"
                      type="video/mp4">
            </video>
          </div>
  
          <h3 class="title is-4">"Place object in the hand "</h3>
          <div class="content has-text-centered">
            <video id="replay-video"
                  controls
                  muted
                  preload
                  playsinline
                  width="75%">
              <source src="./static/videos/handoverx4.mp4"
                      type="video/mp4">
            </video>
          </div>
  
          <h3 class="title is-4">"Pick up the cup "</h3>
          <div class="content has-text-centered">
              <video id="replay-video"
                    controls
                    muted
                    preload
                    playsinline
                    width="75%">
                <source src="./static/videos/pick-cupx4.mp4"
                        type="video/mp4">
              </video>
          </div>
  
          <h3 class="title is-4">"Place object in the trash "</h3>
          <div class="content has-text-centered">
                <video id="replay-video"
                      controls
                      muted
                      preload
                      playsinline
                      width="75%">
                  <source src="./static/videos/trashx4.mp4"
                          type="video/mp4">
                </video>
          </div>
              
              
          <h3 class="title is-4">"Pick up the medicine bottle "</h3>
          <div class="content has-text-centered">
                <video id="replay-video"
                      controls
                      muted
                      preload
                      playsinline
                      width="75%">
                  <source src="./static/videos/medicinex4.mp4"
                          type="video/mp4">
                </video>
          </div>  -->
  
        </div>
      </div>
      <!--/ Tasks -->

    <div class="columns is-centered">

      <!-- Visual Effects. -->
      <!-- <div class="column">
        <div class="content">
          <h2 class="title is-3">Visual Effects</h2>
          <p>
            Using <i>nerfies</i> you can create fun visual effects. This Dolly zoom effect
            would be impossible without nerfies since it would require going through a wall.
          </p>
          <video id="dollyzoom" autoplay controls muted loop playsinline height="100%">
            <source src="./static/videos/dollyzoom-stacked.mp4"
                    type="video/mp4">
          </video>
        </div>
      </div> -->
      <!--/ Visual Effects. -->

      <!-- Architecture. -->
      <div class="column">
        <h2 class="title is-3">Model Architecture</h2>
        <div class="columns is-centered">
          <div class="column content">
            <img src="./static/images/Architecture.png" class="interpolation-image is-centered" alt="ForceSight architecture"/>
            <!-- <video autoplay="" loop="" muted="" src="./static/videos/architecture.mp4" class="interpolation-image is-centered"  alt="Interpolation end reference image."></video> -->
            <p>
              <span class="dnerf">ForceSight</span> is a text-conditioned RGBD vision transformer. An RGBD image is first divided into patches and passed into an RGBD-adapted patch encoder that transforms image patches into image tokens. These image tokens are fed into a vision transformer. After every transformer block inside the vision transformer, the visual features are conditioned on a text embedding via cross-attention to produce text-conditioned image patch features. These patch features are passed into two convolutional decoders to produce an affordance map and a depth map. The patch features are additionally average pooled and passed into several MLPs in order to predict the gripper width, applied force, grip force, and yaw.
            </p>
          </div>
        </div>
      </div>
      <!--/ Architecture. -->
    </div>

    <div class="column">
      <h2 class="title is-3">System Architecture</h2>
      <div class="columns is-centered">
        <div class="column content">
          <img src="./static/images/sys-archi.png" -->
          <!-- <video autoplay="" loop="" muted="" src="./static/videos/high_level.mp4" class="interpolation-image is-centered"  alt="Interpolation end reference image."></video> -->
          <p>
            The <span class="dnerf">ForceSight</span> system architecture comprises several components that work together to accomplish a text-conditioned task. It begins with a high-level task planner, which takes a text input and generates a sequence of action primitives representing subgoals. These action primitives, along with the RGBD input, are then passed to the <span class="dnerf">ForceSight</span> transformer model. This model processes the input and produces force-based objectives. These objectives are subsequently fed into the low-level controller, which generates joint motion commands to reach the next goal. To determine when to switch to the next action primitive, the low-level controller compares the error between the current states and visual-force goals with a predefined threshold. If the error is below the threshold, the low-level controller initiates the switch to the next action primitive. This entire process loop operates at a frequency of 8 Hz.
          </p>
        </div>
      </div>
    </div>

    <br>

    <!-- Force vs No Force -->
    <div class="columns is-centered">
      <div class="column is-full-width">
        <h2 class="title is-3"> Visual-Force Goals (Left) vs Kinematic Goals w/o Force (Right)</h2>
        
        <h3 class="title is-4">"pick up the keys" </h3>
        <div class="content has-text-centered">
          <video autoplay="" loop="" muted="" src="./static/videos/keys_force.mp4" class="interpolation-image is-centered"  alt="Interpolation end reference image."></video>
          <!-- <video id="replay-video"
                controls
                muted
                preload
                playsinline
                width="75%">
                <source src="./static/videos/open-drawerx4.mp4"
                <source src="./static/videos/drawer_cubiclex4.mp4" 
            <source src="./static/videos/1x3_video_drawer.mp4"
                    type="video/mp4">
          </video> -->
        </div>

        <h3 class="title is-4">"pick up the paperclip" </h3>
        <div class="content has-text-centered">
          <video autoplay="" loop="" muted="" src="./static/videos/paperclip_force.mp4" class="interpolation-image is-centered"  alt="Interpolation end reference image."></video>
          <!-- <video id="replay-video"
                controls
                muted
                preload
                playsinline
                width="75%">
                <source src="./static/videos/open-drawerx4.mp4"
                <source src="./static/videos/drawer_cubiclex4.mp4" 
            <source src="./static/videos/1x3_video_drawer.mp4"
                    type="video/mp4">
          </video> -->
        </div>

        <h3 class="title is-4">"place object in the hand" </h3>
        <div class="content has-text-centered">
          <video autoplay="" loop="" muted="" src="./static/videos/handover_force.mp4" class="interpolation-image is-centered"  alt="Interpolation end reference image."></video>
          <!-- <video id="replay-video"
                controls
                muted
                preload
                playsinline
                width="75%">
                <source src="./static/videos/open-drawerx4.mp4"
                <source src="./static/videos/drawer_cubiclex4.mp4" 
            <source src="./static/videos/1x3_video_drawer.mp4"
                    type="video/mp4">
          </video> -->
        </div>

      </div>
    </div>

    <!--/ Dynamic Object Handover -->

    <div class="columns is-centered">
      <div class="column is-full-width">
        <h2 class="title is-3">Dynamic Object Handover</h2>
        
        <h3 class="title is-4">"place object in the hand" </h3>
        <div class="content has-text-centered">
          <video autoplay="" loop="" muted="" src="./static/videos/hand_tracking.mp4" class="interpolation-image is-centered"  alt="Dynamic object handover."></video>
        </div>
      </div>
    </div>

    <!--/ Force vs No Force -->

    <!-- Continous vs Binary Force -->
    <div class="columns is-centered">
      <div class="column is-full-width">
        <h2 class="title is-3">Continuous Grip Force (Left) vs Binary Gripper Position (Right)</h2>
        
        <h3 class="title is-4">"pick up the cup" </h3>
        <div class="content has-text-centered">
          <video autoplay="" loop="" muted="" src="./static/videos/cup_force.mp4" class="interpolation-image is-centered"  alt="Interpolation end reference image."></video>
          <!-- <video id="replay-video"
                controls
                muted
                preload
                playsinline
                width="75%">
                <source src="./static/videos/open-drawerx4.mp4"
                <source src="./static/videos/drawer_cubiclex4.mp4" 
            <source src="./static/videos/1x3_video_drawer.mp4"
                    type="video/mp4">
          </video> -->
        </div>

      </div>
    </div>

   <!--/ Emergent Properties -->

   <div class="columns is-centered">
    <div class="column is-full-width">
      <h2 class="title is-3">Emergent Properties</h2>
      <h3 class="title is-4">Generalization</h3>
      <div class="content has-text-centered">
        <img src="./static/images/train_test.png" class="interpolation-image is-centered" alt="Left:train, right:test"/>
        <p>
          <span class="dnerf">ForceSight</span> is able to generalize to novel environments and unseen object instances. <br>
          <b>Left:</b> Objects in the training set. <b>Right:</b> Objects in the test set.
        </p>
      </div>
    </div>
  </div>

  <br>

   <div class="columns is-centered">
    <div class="column is-full-width">
      <h3 class="title is-4">Agent Agnostic</h3>
      
      <div class="content has-text-centered">
        <img src="./static/images/agent_agnostic.png" class="interpolation-image is-centered" alt="ForceSight is agent-agnostic"/>
        <p>
          Predictions from <span class="dnerf">ForceSight</span> are agnostic to the agent and camera perspective, as shown in this example for the apple grasping task.
        </p>
      </div>
    </div>
  </div>

  <br>

  <div class="columns is-centered">
    <div class="column is-full-width">
      <h3 class="title is-4">Multi-step prediction</h3>
      
      <div class="content has-text-centered">
        <img src="./static/images/multistep_prediction.png" class="interpolation-image is-centered" alt="Multistep prediction" style="width: 75%;"/>
        <p>
        <span class="dnerf">ForceSight</span> is able to make reasonable predictions for action primitives that are more than one keyframe into the future, despite having been trained to predict goals associated with only the next keyframe.
        </p>
      </div>
    </div>
  </div>

  <!--/ Assigning action primitives with an LLM -->

  <div class="columns is-centered">
    <div class="column is-full-width">
      <h2 class="title is-3">Assigning action primitives with an LLM</h2>
      
      <div class="content has-text-centered">
        <!-- <video autoplay="" loop="" muted="" src="./static/videos/hand_tracking.mp4" class="interpolation-image is-centered"  alt="Dynamic object handover."></video> -->
        <img src="./static/images/gpt4_output.png" class="interpolation-image is-centered" alt="GPT-4 output"/>
        <p>
          We demonstrate how a large language model (GPT-4) could plausibly be used to assign action primitives to task descriptions.
        </p>
      </div>
    </div>
  </div>
   
    <!-- / Assigning action primitives with an LLM -->
    <!-- <div class="column">
      <h2 class="title is-3">Assigning action primitives with an LLM</h2>
      <div class="columns is-full-width">
        <div class="column content">
          <img src="./static/images/gpt4_output.png" class="interpolation-image is-centered" alt="GPT-4 output"/>
          <p>
            We demonstrate how a large language model (GPT-4) could plausibly be used to assign action primitives to task descriptions.
          </p>
        </div>
      </div>
    </div> -->

    <!-- Concurrent Work. -->
    <!-- <div class="columns is-centered">
      <div class="column is-full-width">
        <h2 class="title is-3">Related Links</h2>

        <div class="content has-text-justified">
          <p>
            There's a lot of excellent work that was introduced around the same time as ours.
          </p>
          <p>
            <a href="https://arxiv.org/abs/2104.09125">Progressive Encoding for Neural Optimization</a> introduces an idea similar to our windowed position encoding for coarse-to-fine optimization.
          </p>
          <p>
            <a href="https://www.albertpumarola.com/research/D-NeRF/index.html">D-NeRF</a> and <a href="https://gvv.mpi-inf.mpg.de/projects/nonrigid_nerf/">NR-NeRF</a>
            both use deformation fields to model non-rigid scenes.
          </p>
          <p>
            Some works model videos with a NeRF by directly modulating the density, such as <a href="https://video-nerf.github.io/">Video-NeRF</a>, <a href="https://www.cs.cornell.edu/~zl548/NSFF/">NSFF</a>, and <a href="https://neural-3d-video.github.io/">DyNeRF</a>
          </p>
          <p>
            There are probably many more by the time you are reading this. Check out <a href="https://dellaert.github.io/NeRF/">Frank Dellart's survey on recent NeRF papers</a>, and <a href="https://github.com/yenchenlin/awesome-NeRF">Yen-Chen Lin's curated list of NeRF papers</a>.
          </p>
        </div>
      </div>
    </div> -->
    <!--/ Concurrent Work. -->

  </div>
</section>


<section class="section" id="BibTeX">
  <div class="container is-max-desktop content">
    <h2 class="title">BibTeX</h2>
    <pre><code>@misc{collins2023forcesight,
      title={ForceSight: Text-Guided Mobile Manipulation with Visual-Force Goals}, 
      author={Jeremy A. Collins and Cody Houff and You Liang Tan and Charles C. Kemp},
      year={2023},
      eprint={2309.12312},
      archivePrefix={arXiv},
      primaryClass={cs.RO}
}
    </code></pre>
  </div>
</section>

<footer class="footer">
  <div class="container">
    <div class="content has-text-centered">
      <a class="icon-link"
         href="./static/paper/ForceSight.pdf">
        <i class="fas fa-file-pdf"></i>
      </a>
      <a class="icon-link" href="https://github.com/force-sight" class="external-link" disabled>
        <i class="fab fa-github"></i>
      </a>
    </div>
    <div class="columns is-centered">
      <div class="column is-8">
        <div class="content">
          <p>
            This website is licensed under a <a rel="license"
                                                href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
            Commons Attribution-ShareAlike 4.0 International License</a>.
          </p>
          <p>
            This website is forked from <a
              href="https://github.com/nerfies/nerfies.github.io">Nerfies</a>.
          </p>
        </div>
      </div>
    </div>
  </div>
</footer>

</body>
</html>