Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: ipld: restore e2e online deal and retrievals w/ identity CIDs #9259

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ require (
github.com/ipld/go-car/v2 v2.10.1
github.com/ipld/go-codec-dagpb v1.6.0
github.com/ipld/go-ipld-prime v0.20.0
github.com/ipld/go-ipld-prime/storage/bsadapter v0.0.0-20230102063945-1a409dc236dd
github.com/ipld/go-ipld-selector-text-lite v0.0.1
github.com/ipni/go-libipni v0.0.8
github.com/ipni/index-provider v0.12.0
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,7 @@ github.com/ipld/go-ipld-prime v0.20.0/go.mod h1:PzqZ/ZR981eKbgdr3y2DJYeD/8bgMawd
github.com/ipld/go-ipld-prime-proto v0.0.0-20191113031812-e32bd156a1e5/go.mod h1:gcvzoEDBjwycpXt3LBE061wT9f46szXGHAmj9uoP6fU=
github.com/ipld/go-ipld-prime/storage/bsadapter v0.0.0-20211210234204-ce2a1c70cd73/go.mod h1:2PJ0JgxyB08t0b2WKrcuqI3di0V+5n6RS/LTUJhkoxY=
github.com/ipld/go-ipld-prime/storage/bsadapter v0.0.0-20230102063945-1a409dc236dd h1:gMlw/MhNr2Wtp5RwGdsW23cs+yCuj9k2ON7i9MiJlRo=
github.com/ipld/go-ipld-prime/storage/bsadapter v0.0.0-20230102063945-1a409dc236dd/go.mod h1:wZ8hH8UxeryOs4kJEJaiui/s00hDSbE37OKsL47g+Sw=
github.com/ipld/go-ipld-selector-text-lite v0.0.1 h1:lNqFsQpBHc3p5xHob2KvEg/iM5dIFn6iw4L/Hh+kS1Y=
github.com/ipld/go-ipld-selector-text-lite v0.0.1/go.mod h1:U2CQmFb+uWzfIEF3I1arrDa5rwtj00PrpiwwCO+k1RM=
github.com/ipni/go-libipni v0.0.8 h1:0wLfZRSBG84swmZwmaLKul/iB/FlBkkl9ZcR1ub+Z+w=
Expand Down
52 changes: 47 additions & 5 deletions itests/deals_anycid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,20 @@ import (
"testing"
"time"

bstore "github.com/ipfs/boxo/blockstore"
dag "github.com/ipfs/boxo/ipld/merkledag"
"github.com/ipfs/go-cid"
ipldcbor "github.com/ipfs/go-ipld-cbor"
format "github.com/ipfs/go-ipld-format"
"github.com/ipld/go-car"
carv2 "github.com/ipld/go-car/v2"
"github.com/ipld/go-car/v2/blockstore"
"github.com/ipld/go-ipld-prime/datamodel"
"github.com/ipld/go-ipld-prime/fluent/qp"
"github.com/ipld/go-ipld-prime/linking"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
"github.com/ipld/go-ipld-prime/node/basicnode"
"github.com/ipld/go-ipld-prime/storage/bsadapter"
selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse"
"github.com/stretchr/testify/require"

Expand Down Expand Up @@ -67,6 +75,39 @@ func TestDealRetrieveByAnyCid(t *testing.T) {
ChunkSize: 1024,
// Max links from a block to other blocks
Maxlinks: 10,
ModifyDAG: func(root cid.Cid, bs bstore.Blockstore) (newRoot cid.Cid, err error) {
lsys := cidlink.DefaultLinkSystem()
lsys.SetWriteStorage(&bsadapter.Adapter{Wrapped: bs})
lp := cidlink.LinkPrototype{Prefix: cid.Prefix{
Version: 1,
Codec: 0x71, // dag-cbor
MhType: 0x0, // identity
}}
lc := linking.LinkContext{}
str := basicnode.NewString("inline string")
strLink, err := lsys.Store(lc, lp, str)
require.NoError(t, err)
blob := basicnode.NewBytes([]byte("inline byte blob"))
blobLink, err := lsys.Store(lc, lp, blob)
require.NoError(t, err)
mapNode, err := qp.BuildMap(basicnode.Prototype.Map, 3, func(la datamodel.MapAssembler) {
qp.MapEntry(la, "my string", qp.Link(strLink))
qp.MapEntry(la, "original root", qp.Link(cidlink.Link{Cid: root}))
qp.MapEntry(la, "my blob", qp.Link(blobLink))
})
require.NoError(t, err)
mapLink, err := lsys.Store(linking.LinkContext{}, lp, mapNode)
require.NoError(t, err)
list, err := qp.BuildList(basicnode.Prototype.List, 2, func(la datamodel.ListAssembler) {
qp.ListEntry(la, qp.Link(blobLink))
qp.ListEntry(la, qp.Link(mapLink))
})
require.NoError(t, err)
listLink, err := lsys.Store(linking.LinkContext{}, lp, list)
require.NoError(t, err)

return listLink.(cidlink.Link).Cid, nil
},
}
carv1FilePath, _ := kit.CreateRandomCARv1(t, 5, 100*1024, dagOpts)
res, err := client.ClientImport(ctx, api.FileRef{Path: carv1FilePath, IsCAR: true})
Expand Down Expand Up @@ -118,12 +159,13 @@ func TestDealRetrieveByAnyCid(t *testing.T) {
info, err := client.ClientGetDealInfo(ctx, *dealCid)
require.NoError(t, err)

// Make retrievals against CIDs at different levels in the DAG
cidIndices := []int{1, 11, 27, 32, 47}
// Make retrievals against CIDs at different levels in the DAG, indices 0 and
// 1 are identity CIDs
cidIndices := []int{0, 1, 11, 27, 32, 47}
for _, val := range cidIndices {
t.Logf("performing retrieval for cid at index %d", val)

targetCid := cids[val]
t.Logf("performing retrieval for cid at index %d / cid %s", val, targetCid)

offer, err := client.ClientMinerQueryOffer(ctx, miner.ActorAddr, targetCid, &info.PieceCID)
require.NoError(t, err)
require.Empty(t, offer.Err)
Expand All @@ -142,7 +184,7 @@ func TestDealRetrieveByAnyCid(t *testing.T) {
// create CAR from original file starting at targetCid and ensure it matches the retrieved CAR file.
tmp, err := os.CreateTemp(t.TempDir(), "randcarv1")
require.NoError(t, err)
rd, err := blockstore.OpenReadOnly(carv1FilePath, blockstore.UseWholeCIDs(true))
rd, err := blockstore.OpenReadOnly(carv1FilePath, blockstore.UseWholeCIDs(true), carv2.StoreIdentityCIDs(true))
require.NoError(t, err)
err = car.NewSelectiveCar(
ctx,
Expand Down
3 changes: 3 additions & 0 deletions itests/kit/deals.go
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,9 @@ func (dh *DealHarness) PerformRetrievalWithOrder(ctx context.Context, deal *cid.
}

carFile := dh.t.TempDir() + string(os.PathSeparator) + "ret-car-" + root.String()
if len(carFile) > 250 { // identity CIDs can be long, don't max out fs limits
carFile = carFile[0:250]
}

caddr, err := dh.client.WalletDefaultAddress(ctx)
require.NoError(dh.t, err)
Expand Down
19 changes: 17 additions & 2 deletions itests/kit/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ import (
dssync "github.com/ipfs/go-datastore/sync"
ipldformat "github.com/ipfs/go-ipld-format"
"github.com/ipld/go-car"
selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse"
"github.com/minio/blake2b-simd"
mh "github.com/multiformats/go-multihash"
"github.com/stretchr/testify/require"

"github.com/filecoin-project/lotus/node/config"
)

const unixfsChunkSize int64 = 1 << 10
Expand Down Expand Up @@ -68,18 +71,28 @@ func CreateRandomCARv1(t *testing.T, rseed, size int, opts ...GeneratedDAGOpts)
require.NoError(t, err)
require.EqualValues(t, n, size)

//
_, err = file.Seek(0, io.SeekStart)
require.NoError(t, err)
bs := bstore.NewBlockstore(dssync.MutexWrap(ds.NewMapDatastore()))
dagSvc := merkledag.NewDAGService(blockservice.New(bs, offline.Exchange(bs)))

root := writeUnixfsDAG(ctx, t, file, dagSvc, opts...)

if len(opts) > 0 && opts[0].ModifyDAG != nil {
root, err = opts[0].ModifyDAG(root, bs)
require.NoError(t, err)
}

// create a CARv1 file from the DAG
tmp, err := os.CreateTemp(t.TempDir(), "randcarv1")
require.NoError(t, err)
require.NoError(t, car.WriteCar(ctx, dagSvc, []cid.Cid{root}, tmp))
require.NoError(t, car.NewSelectiveCar(ctx, dagSvc.Blocks.Blockstore(),
[]car.Dag{{
Root: root,
Selector: selectorparse.CommonSelector_ExploreAllRecursively,
}},
car.MaxTraversalLinks(config.MaxTraversalLinks),
).Write(tmp))
_, err = tmp.Seek(0, io.SeekStart)
require.NoError(t, err)
hd, err := car.ReadHeader(bufio.NewReader(tmp))
Expand All @@ -94,6 +107,7 @@ func CreateRandomCARv1(t *testing.T, rseed, size int, opts ...GeneratedDAGOpts)
type GeneratedDAGOpts struct {
ChunkSize int64
Maxlinks int
ModifyDAG func(root cid.Cid, bs bstore.Blockstore) (newRoot cid.Cid, err error)
}

func writeUnixfsDAG(ctx context.Context, t *testing.T, rd io.Reader, dag ipldformat.DAGService, opts ...GeneratedDAGOpts) cid.Cid {
Expand All @@ -117,6 +131,7 @@ func writeUnixfsDAG(ctx context.Context, t *testing.T, rd io.Reader, dag ipldfor
params := ihelper.DagBuilderParams{
Maxlinks: dagOpts.Maxlinks,
RawLeaves: true,
// NOTE: InlineBuilder not recommended, we are using this to test identity CIDs
CidBuilder: cidutil.InlineBuilder{
Builder: prefix,
Limit: 126,
Expand Down
7 changes: 1 addition & 6 deletions lib/unixfs/filestore.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"github.com/ipfs/boxo/ipld/unixfs/importer/balanced"
ihelper "github.com/ipfs/boxo/ipld/unixfs/importer/helpers"
"github.com/ipfs/go-cid"
"github.com/ipfs/go-cidutil"
ipld "github.com/ipfs/go-ipld-format"
mh "github.com/multiformats/go-multihash"
"golang.org/x/xerrors"
Expand All @@ -33,11 +32,7 @@ func CidBuilder() (cid.Builder, error) {
return nil, fmt.Errorf("failed to initialize UnixFS CID Builder: %w", err)
}
prefix.MhType = DefaultHashFunction
b := cidutil.InlineBuilder{
Builder: prefix,
Limit: 126,
}
return b, nil
return prefix, nil
Comment on lines -36 to +35
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if we should include this change in this PR; Yes, there were bugs in ID cid handling, but that doesn't negate the fact that it's an useful feature.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm proposing that we don't do this in Lotus and don't really encourage it for Filecoin, especially for the online deal flow. For two reasons:

  1. In Filecoin, there's a relationship between a block and the piece it's stored in. This is complicated for identity CIDs and is a problem when we have retrieval modes where you're able to ask for an arbitrary (not necessarily deal root) "payload CID" and have to figure out what piece it's in so in order to make decisions about availability and price. Illustrated by the difficulty in dealing with identity CIDs as roots in feat: handle retrieval queries for unindexed identity payload CIDs go-fil-markets#747 (this is not really a "fix" for that problem, just a patch to make it workable, it'd not be a problem if we didn't have identity CIDs that can fit CIDs) but the same applies with non-root identity CIDs unless we make sure to store them as blocks and index them as blocks, thereby reducing their utility and increasing the complexity on all of the layers we manage. This gets even more complicated when we index the content in a piece and send that to an indexer. Should the indexer ignore them? Does the CID:Piece relationship matter at that level?
  2. The break-even cost of storing inline CIDs is pretty low with the way we're having to store them. For an online deal, we end up at only 32-bytes break-even and that's without counting the extra CARv2 index that's generated (I think the break-even for SPs with the CARv2 index is just ~20-bytes, far from the 126 max default here).

Screenshot 2022-09-05 at 8 54 58 pm

(I wrote about this in more detail over here)

So, maybe we just avoid it entirely? At least in lotus—we can't stop them entirely but we can set best practice.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Yeah avoiding those is probably reasonable.

I wonder if we could allow them, but only for leaf nodes, where we wouldn't need to worry about traversal edge-cases.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe, but in the current form CidBuilder only gets the bytes so it'd need to decode them to look for links to determine that: https://github.com/ipfs/go-cidutil/blob/ac549eaa2ca06b13a1e5b37ee2696cd72b6481a7/inline.go#L21-L26, that's probably not very efficient.

Alternatively we could introduce an optional LeafCidBuilder into DagBuilderHelper and special-case use it when it's set for leaves: https://github.com/ipfs/go-unixfs/blob/2c23c3ea6fae3ef1b487cfc0c606a4ffc7893676/importer/helpers/dagbuilder.go#L154

}

// CreateFilestore takes a standard file whose path is src, forms a UnixFS DAG, and
Expand Down
3 changes: 2 additions & 1 deletion markets/retrievaladapter/client_blockstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

bstore "github.com/ipfs/boxo/blockstore"
"github.com/ipfs/go-cid"
carv2 "github.com/ipld/go-car/v2"
"github.com/ipld/go-car/v2/blockstore"
"golang.org/x/xerrors"

Expand Down Expand Up @@ -140,7 +141,7 @@ func (c *CARBlockstoreAccessor) Get(id retrievalmarket.DealID, payloadCid retrie
}

path := c.PathFor(id)
bs, err := blockstore.OpenReadWrite(path, []cid.Cid{payloadCid}, blockstore.UseWholeCIDs(true))
bs, err := blockstore.OpenReadWrite(path, []cid.Cid{payloadCid}, blockstore.UseWholeCIDs(true), carv2.StoreIdentityCIDs(true))
if err != nil {
return nil, err
}
Expand Down