Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an experimental feature to directly encode #254

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
# 0.8.1
* Fix overlong encoding of packed "sint32" fields containing elements in
[-0x80000000, -0x40000001] or [0x40000000, 0x7FFFFFFF], which increased
message size and hindered forward compatibility of "sint32" with "sint64".
* Add an experimental feature to encode without using
intermediate data structures; see Proto3.Suite.Form and
the new compile-proto-file option --typeLevelFormat
* Fix support for dhall-1.42.
* Support dhall on GHC 9.8.
* Fix aeson upper bound in library target (was correct in test target).
* Fix default compiler version in shell.nix.
* Add --stringType as a preferred spelling of --string-type
because its style matches other options.
* Test with GHC 9.8.2 instead of GHC 9.8.1 and GHC 9.6.5 instead of GHC 9.6.2.
* Test with nixpkgs-24.05 but always using aeson-2.1.2.1.

Expand Down
34 changes: 30 additions & 4 deletions nix/overlays/haskell-packages.nix
Original file line number Diff line number Diff line change
Expand Up @@ -279,15 +279,37 @@ in {
jailbreak = true;
});

# Newer versions of "witch" do not support GHC 9.0.
witch =
if builtins.compareVersions haskellPackagesOld.ghc.version "9.2.0" < 0
then haskellPackagesNew.callPackage (
{ mkDerivation, base, bytestring, containers, HUnit, lib, tagged
, template-haskell, text, time, transformers
}:
mkDerivation {
pname = "witch";
version = "1.1.6.0";
sha256 = "e3f0879abbc22d7c674219317783438f28325e09e0b30cbc8890c936d870192e";
libraryHaskellDepends = [
base bytestring containers tagged template-haskell text time
];
testHaskellDepends = [
base bytestring containers HUnit tagged text time transformers
];
description = "Convert values from one type into another";
license = lib.licenses.mit;
}) {}
else haskellPackagesOld.witch;

# With nixpkgs-23.11 and ghc962, proto3-wire thinks
# that doctest and transformers are out of bounds.
proto3-wire =
let
source = pkgsNew.fetchFromGitHub {
owner = "awakesecurity";
repo = "proto3-wire";
rev = "b3d837f66d97f97f1ad46c5bb0f1d1bb3b7b13c1"; # 1.4.2
sha256 = "LXinRHg7fjBf9of7pDm/oWAacCwJ9x/PtnJz6S0W/FA=";
rev = "40a907da45cf3946d91ac34799749b6e92cc86d2"; # 1.4.4
sha256 = "8LgeDChLOpcLiBX/tRBmhan2o/TWBSdompfPoqBI7NQ=";
};
in
pkgsNew.haskell.lib.doJailbreak
Expand Down Expand Up @@ -353,7 +375,11 @@ in {

test-files = (gitignoreSource ../../test-files);

compile-proto-flags = if enableLargeRecords then "--largeRecords" else "";
compile-proto-flags = {
largeRecords = enableLargeRecords;
typeLevelFormat = true;
};

cg-artifacts = pkgsNew.runCommand "proto3-suite-test-cg-artifacts" { } ''
mkdir -p $out/protos

Expand All @@ -364,7 +390,7 @@ in {
build () {
echo "[proto3-suite-test-cg-artifacts] Compiling proto-file/$1"
${haskellPackagesNew.proto3-suite-boot}/bin/compile-proto-file \
${compile-proto-flags} \
${pkgsNew.lib.cli.toGNUCommandLineShell {} compile-proto-flags} \
--out $out \
--includeDir "$2" \
--proto "$1"
Expand Down
36 changes: 23 additions & 13 deletions proto3-suite.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,16 @@ library
exposed-modules: Proto3.Suite
Proto3.Suite.Class
Proto3.Suite.DotProto
Proto3.Suite.DotProto.AST
Proto3.Suite.DotProto.AST.Lens
Proto3.Suite.DotProto.Generate
Proto3.Suite.DotProto.Generate.LargeRecord
Proto3.Suite.DotProto.Generate.Record
Proto3.Suite.DotProto.Generate.Syntax
Proto3.Suite.DotProto.AST
Proto3.Suite.DotProto.AST.Lens
Proto3.Suite.DotProto.Parsing
Proto3.Suite.DotProto.Rendering
Proto3.Suite.Form
Proto3.Suite.Form.Encode
Proto3.Suite.JSONPB
Proto3.Suite.Tutorial
Proto3.Suite.Types
Expand All @@ -98,7 +100,9 @@ library
Proto3.Suite.Haskell.Parser
Proto3.Suite.JSONPB.Class

other-modules: Turtle.Compat
other-modules:
Proto3.Suite.Form.Encode.Core
Turtle.Compat

build-depends: aeson >= 1.1.1.0 && < 2.2,
aeson-pretty,
Expand All @@ -123,18 +127,19 @@ library
parsers >= 0.12 && <0.13,
pretty ==1.1.*,
pretty-show >= 1.6.12 && < 2.0,
proto3-wire >= 1.2.2 && < 1.5,
proto3-wire >= 1.4.4 && < 1.5,
QuickCheck >=2.10 && <2.15,
quickcheck-instances >=0.3.26 && < 0.4,
safe ==0.3.*,
split,
system-filepath,
time,
template-haskell >=2.17 && <2.22,
text >= 0.2 && <2.2,
text-short >=0.1.3 && <0.2,
time,
transformers >=0.4 && <0.7,
turtle < 1.6.0 || >= 1.6.1 && < 1.7.0,
vector >=0.11 && <0.14
vector >=0.11 && <0.14
hs-source-dirs: src
default-language: Haskell2010
ghc-options: -O2 -Wall -Werror
Expand All @@ -149,6 +154,8 @@ test-suite tests
gen
tests

cpp-options: -DTYPE_LEVEL_FORMAT

if flag(dhall)
other-modules: TestDhall
build-depends: dhall >=1.13 && < 1.43
Expand All @@ -168,28 +175,31 @@ test-suite tests
autogen-modules:
TestProto
TestProtoImport
TestProtoNegativeEnum
TestProtoNestedMessage
TestProtoOneof
TestProtoOneofImport
TestProtoWrappers
TestProtoNestedMessage

other-modules:
ArbitraryGeneratedTestTypes
TestCodeGen
TestProto
TestProtoImport
TestProtoOneof
TestProtoOneofImport
TestProtoWrappers
--TestProtoLeadingDot
TestProtoNegativeEnum
TestProtoNestedMessage
TestProtoOneof
TestProtoOneofImport
--TestProtoProtocPlugin
TestProtoWrappers
Test.Proto.Generate.Name
Test.Proto.Generate.Name.Gen
Test.Proto.Parse
Test.Proto.Parse.Core
Test.Proto.Parse.Gen
Test.Proto.Parse.Option
Test.Proto.ToEncoder

build-depends:
aeson >= 1.1.1.0 && < 2.2
Expand All @@ -209,7 +219,7 @@ test-suite tests
, pretty ==1.1.*
, pretty-show >= 1.6.12 && < 2.0
, proto3-suite
, proto3-wire >= 1.2 && < 1.5
, proto3-wire >= 1.4.4 && < 1.5
, QuickCheck >=2.10 && <2.15
, record-hasfield
, tasty >= 0.11 && <1.5
Expand All @@ -220,7 +230,7 @@ test-suite tests
, text-short >=0.1.3 && <0.2
, transformers >=0.4 && <0.7
, turtle
, vector >=0.11 && < 0.14
, vector >=0.11 && <0.14

ghc-options: -O2 -Wall

Expand All @@ -246,7 +256,7 @@ executable canonicalize-proto-file
, mtl >=2.2 && <2.4
, optparse-generic
, proto3-suite
, proto3-wire >= 1.2 && <1.5
, proto3-wire >= 1.4.4 && < 1.5
, range-set-list >=0.1.2 && <0.2
, system-filepath
, turtle
Expand Down
87 changes: 68 additions & 19 deletions src/Proto3/Suite/Class.hs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ module Proto3.Suite.Class
, fromB64
, coerceOver
, unsafeCoerceOver
, ZigZag(..)

-- * Documentation
, Named(..)
Expand All @@ -103,6 +104,7 @@ module Proto3.Suite.Class
import Control.Applicative
#endif
import Control.Monad
import Data.Bits (Bits, FiniteBits)
import qualified Data.ByteString as B
import qualified Data.ByteString.Base64 as B64
import qualified Data.ByteString.Lazy as BL
Expand Down Expand Up @@ -743,39 +745,26 @@ instance MessageField (PackedVec Int64) where

instance MessageField (PackedVec (Signed Int32)) where
encodeMessageField !fn =
omittingDefault (Encode.packedVarintsV zigZag fn) . coerce @_ @(Vector Int32)
omittingDefault (Encode.packedVarintsV zigZag fn) . packedvec
where
zigZag = fromIntegral . Encode.zigZagEncode
zigZag = fromIntegral @Word32 @Word64 . zigZagEncode @Int32
{-# INLINE encodeMessageField #-} -- Let 'Encode.packedVarintsV' figure out how much to inline.

decodeMessageField = decodePacked (fmap (fmap zagZig) Decode.packedVarints)
where
-- This type signature is important: `Decode.zigZagDecode` will not undo
-- `Encode.zigZagEncode` if given a signed value with the high order bit
-- set. So we don't allow GHC to infer a signed input type.
zagZig :: Word32 -> Signed Int32
zagZig = Signed . fromIntegral . Decode.zigZagDecode
decodeMessageField = decodePacked (fmap (fmap zigZagDecode) Decode.packedVarints)

protoType _ = messageField (Repeated SInt32) (Just PackedField)

instance MessageField (PackedVec (Signed Int64)) where
encodeMessageField !fn =
omittingDefault (Encode.packedVarintsV zigZag fn) . coerce @_ @(Vector Int64)
omittingDefault (Encode.packedVarintsV zigZag fn) . packedvec
where
zigZag = fromIntegral . Encode.zigZagEncode
zigZag = zigZagEncode @Int64
{-# INLINE encodeMessageField #-} -- Let 'Encode.packedVarintsV' figure out how much to inline.

decodeMessageField = decodePacked (fmap (fmap zagZig) Decode.packedVarints)
where
-- This type signature is important: `Decode.zigZagDecode` will not undo
-- `Encode.zigZagEncode` if given a signed value with the high order bit
-- set. So we don't allow GHC to infer a signed input type.
zagZig :: Word64 -> Signed Int64
zagZig = Signed . fromIntegral . Decode.zigZagDecode
decodeMessageField = decodePacked (fmap (fmap zigZagDecode) Decode.packedVarints)

protoType _ = messageField (Repeated SInt64) (Just PackedField)


instance MessageField (PackedVec (Fixed Word32)) where
encodeMessageField !fn =
omittingDefault (Encode.packedFixed32V id fn) . coerce @_ @(Vector Word32)
Expand Down Expand Up @@ -1067,3 +1056,63 @@ instance GenericMessage f => GenericMessage (M1 D t f) where
genericEncodeMessage num (M1 x) = genericEncodeMessage num x
genericDecodeMessage num = M1 <$> genericDecodeMessage num
genericDotProto _ = genericDotProto (proxy# :: Proxy# f)

class ZigZag a
where
-- | The unsigned integral type used to hold the value
-- after ZigZag encoding but before varint encoding, and
-- after varint decoding but before ZigZag decoding.
--
-- NOTE: The two integral types must have the same width both to
-- correctly encode large @sint32@ values and, during decoding,
-- to compensate for overlong encodings emitted by versions of
-- this library before v0.8.1. Those older versions incorrectly
-- sign-extended ZigZag-encoded @sint32@ values in packed fields.
type ZigZagEncoded a :: Type

-- | Importantly, the resulting unsigned integer has the same
-- width as the input type, so that any integral promotion before
-- or during varint encoding will zero-pad instead of sign-extend.
--
-- Sign extension would result in a more bulky encoding
-- and would violate the compatibility guarantee in
-- <https://protobuf.dev/programming-guides/proto3/#updating> that
-- an @sint32@ value can be decoded as if it had type @sint64@.
zigZagEncode :: Signed a -> ZigZagEncoded a
default zigZagEncode ::
(Num a, FiniteBits a, Integral a, Num (ZigZagEncoded a)) =>
Signed a -> ZigZagEncoded a
zigZagEncode = fromIntegral . Encode.zigZagEncode . signed
{-# INLINE zigZagEncode #-}

-- | Importantly, the given unsigned integer has the same width as the result type.
-- If the encoder was a version of this library before v0.8.1, and the field was
-- packed, it would have incorrectly sign-extended between the ZigZag encoding step
-- and the varint encoding step, rather than zero-padding. By narrowing before we
-- ZigZag decode, we exclude the incorrect bits.
--
-- Maintaining compatibility with versions of this library before v0.8.1
-- does have a curious side effect. When an @sint64@ value outside the
-- range of @sint32@ is decoded as type @sint32@, the sign will be
-- preserved and the magnitude decreased, rather than the more typical
-- conversion that outputs the remainder after division by @2^32@, as
-- would happen with 'fromIntegral @Int64 @Int32'. One could argue that
-- our behavior is surprising and unusual. However, both narrowings
-- lose information, and neither is supported by protobuf:
-- <https://protobuf.dev/programming-guides/dos-donts/> says,
-- "However, changing a field's message type will break
-- unless the new message is a superset of the old one."
zigZagDecode :: ZigZagEncoded a -> Signed a
default zigZagDecode ::
(Num (ZigZagEncoded a), Bits (ZigZagEncoded a), Integral (ZigZagEncoded a), Num a) =>
ZigZagEncoded a -> Signed a
zigZagDecode = Signed . fromIntegral . Decode.zigZagDecode
{-# INLINE zigZagDecode #-}

instance ZigZag Int32
where
type ZigZagEncoded Int32 = Word32

instance ZigZag Int64
where
type ZigZagEncoded Int64 = Word64
Loading
Loading